Arnaud Michel
2014-Feb-06 13:21 UTC
[R] To map population of European countries from Eurostat
Hello I would like to map the population of the European countries in 2011. I am using the spatial shapefiles of Europe published by EUROSTAT. I applyed the script below of Markus Kainubut but I had a problem with the map. Any ideas ? Thanks for your help ######################################### # Importing the data library(SmarterPoland) df <- getEurostatRaw(kod = "tps00001") # rename 1ere variable names(df) <- c("xx", 2002:2013) # new variable df$unit et df$geo.time df$unit <- lapply(strsplit(as.character(df$xx), ","), "[", 1) df$geo.time <- lapply(strsplit(as.character(df$xx), ","), "[", 2) # file df.l df.l <- melt(data = df, id.vars = "geo.time", measure.vars = c("2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013")) df.l$geo.time <- unlist(df.l$geo.time) # unlist the geo.time variable # Load the shapefile GISCO, subset it to NUTS2-level # and merge the Eurostat # attribute data with it into single SpatialPolygonDataFrame Filetodownload <- "http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/NUTS_2010_60M_SH.zip" download.file(Filetodownload, destfile = "NUTS_2010_60M_SH.zip") rm(Filetodownload) # Unzip NUTS_2010_60M_SH.zip to SpatialPolygonsDataFrame unzip("NUTS_2010_60M_SH.zip") library(rgdal) # Lecture du fichier map <- readOGR(dsn = "./NUTS_2010_60M_SH/data", layer = "NUTS_RG_60M_2010") # OGR data source with driver: ESRI Shapefile # Source: "./NUTS_2010_60M_SH/data", layer: "NUTS_RG_60M_2010" # with 1920 features and 4 fields # Feature type: wkbPolygon with 2 dimensions names(map) #[1] "NUTS_ID" "STAT_LEVL_" "SHAPE_Leng" "SHAPE_Area" # as the data is at NUTS2-level, we subset the # spatialpolygondataframe map_nuts2 <- subset(map, STAT_LEVL_ <= 2) # dim show how many regions are in the spatialpolygondataframe dim(map_nuts2) ## [1] 467 4 # dim show how many regions are in the data.frame dim(df) ## [1] 43 15 ################################################################ # Spatial dataframe has 467 rows and attribute data 43. We need to make # attribute data to have similar number of rows NUTS_ID <- as.character(map_nuts2$NUTS_ID) VarX <- rep("empty", 467) dat <- data.frame(NUTS_ID, VarX) # then we shall merge this with Eurostat data.frame dat2 <- merge(dat, df, by.x = "NUTS_ID", by.y = "geo.time", all.x = TRUE) ## merge this manipulated attribute data with the spatialpolygondataframe ## there are still duplicates in the data, remove them dat2$dup <- duplicated(dat2$NUTS_ID) dat3 <- subset(dat2, dup == FALSE) ## rownames row.names(dat3) <- dat3$NUTS_ID row.names(map_nuts2) <- as.character(map_nuts2$NUTS_ID) ## order data dat3 <- dat3[order(row.names(dat3)), ] map_nuts2 <- map_nuts2[order(row.names(map_nuts2)), ] ## join library(maptools) shape <- spCbind(map_nuts2, dat3) ########################################################################## # Munging the shapefile into data.frame and ready for ggplot-plotting ## fortify spatialpolygondataframe into data.frame library(ggplot2) library(rgeos) shape$id <- rownames(shape at data) map.points <- fortify(shape, region = "id") map.df <- merge(map.points, shape, by = "id") ############################################################### # Only year 2011 # As we want to plot map faceted by years from 2003 to 2011 we have to # melt it into long format map.df <- map.df[, -c(15:23,25,26)] library(reshape2) map.df.l <- melt(data = map.df, id.vars = c("id", "long", "lat", "group"), measure.vars = c("X2011")) # year variable (variable) is class string and type X20xx. Lets remove # the X and convert it to numerical library(stringr) map.df.l$variable <- str_replace_all(map.df.l$variable, "X", "") map.df.l$variable <- factor(map.df.l$variable) map.df.l$variable <- as.numeric(levels(map.df.l$variable))[map.df.l$variable] # And finally the plot using ggplot2 #Map shows proportion of materially deprived households at the NUTS2 level. #Grey color indicates missing data. library(ggplot2) ggplot(map.df.l, aes(long,lat,group=group)) + geom_polygon(aes(fill = value)) + geom_polygon(data = map.df.l, aes(long,lat), fill=NA, color = "white", size=0.1) + # white borders coord_map(project="orthographic", xlim=c(-22,34), ylim=c(35,70)) + # proj labs(title = "Cartographie") + theme_minimal() -- Michel ARNAUD Charg? de mission aupr?s du DRH DGDRD-Drh - TA 174/04 Av Agropolis 34398 Montpellier cedex 5 tel : 04.67.61.75.38 fax : 04.67.61.57.87 port: 06.47.43.55.31
Jon Olav Skoien
2014-Feb-07 09:38 UTC
[R] To map population of European countries from Eurostat
Michel, I am no expert on ggplot2 and cannot explain why, but it seems the NAs in map.df.l$value cause some problems. Try to remove them before plotting or examine your code to figure out why you get them. They seem to be on the borders between NUTS2-level objects. Best wishes, Jon On 06-Feb-14 14:21, Arnaud Michel wrote:> Hello > I would like to map the population of the European countries in 2011. > I am using the spatial shapefiles of Europe published by EUROSTAT. > I applyed the script below of Markus Kainubut but I had a problem with > the map. > > Any ideas ? > Thanks for your help > > ######################################### > # Importing the data > library(SmarterPoland) > df <- getEurostatRaw(kod = "tps00001") > > # rename 1ere variable > names(df) <- c("xx", 2002:2013) > > # new variable df$unit et df$geo.time > df$unit <- lapply(strsplit(as.character(df$xx), ","), "[", 1) > df$geo.time <- lapply(strsplit(as.character(df$xx), ","), "[", 2) > > # file df.l > df.l <- melt(data = df, id.vars = "geo.time", measure.vars = c("2002", > "2003", > "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", > "2012", "2013")) > > df.l$geo.time <- unlist(df.l$geo.time) # unlist the geo.time variable > > # Load the shapefile GISCO, subset it to NUTS2-level > # and merge the Eurostat > # attribute data with it into single SpatialPolygonDataFrame > > Filetodownload <- > "http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/NUTS_2010_60M_SH.zip" > download.file(Filetodownload, destfile = "NUTS_2010_60M_SH.zip") > rm(Filetodownload) > > # Unzip NUTS_2010_60M_SH.zip to SpatialPolygonsDataFrame > unzip("NUTS_2010_60M_SH.zip") > > library(rgdal) > # Lecture du fichier > map <- readOGR(dsn = "./NUTS_2010_60M_SH/data", layer = > "NUTS_RG_60M_2010") > # OGR data source with driver: ESRI Shapefile > # Source: "./NUTS_2010_60M_SH/data", layer: "NUTS_RG_60M_2010" > # with 1920 features and 4 fields > # Feature type: wkbPolygon with 2 dimensions > > names(map) > #[1] "NUTS_ID" "STAT_LEVL_" "SHAPE_Leng" "SHAPE_Area" > > # as the data is at NUTS2-level, we subset the > # spatialpolygondataframe > map_nuts2 <- subset(map, STAT_LEVL_ <= 2) > > # dim show how many regions are in the spatialpolygondataframe > dim(map_nuts2) > ## [1] 467 4 > > # dim show how many regions are in the data.frame > dim(df) > ## [1] 43 15 > > ################################################################ > # Spatial dataframe has 467 rows and attribute data 43. We need to make > # attribute data to have similar number of rows > NUTS_ID <- as.character(map_nuts2$NUTS_ID) > VarX <- rep("empty", 467) > dat <- data.frame(NUTS_ID, VarX) > > # then we shall merge this with Eurostat data.frame > dat2 <- merge(dat, df, by.x = "NUTS_ID", by.y = "geo.time", all.x = TRUE) > > ## merge this manipulated attribute data with the spatialpolygondataframe > ## there are still duplicates in the data, remove them > dat2$dup <- duplicated(dat2$NUTS_ID) > dat3 <- subset(dat2, dup == FALSE) > > ## rownames > row.names(dat3) <- dat3$NUTS_ID > row.names(map_nuts2) <- as.character(map_nuts2$NUTS_ID) > > ## order data > dat3 <- dat3[order(row.names(dat3)), ] > map_nuts2 <- map_nuts2[order(row.names(map_nuts2)), ] > > ## join > library(maptools) > shape <- spCbind(map_nuts2, dat3) > > ########################################################################## > > # Munging the shapefile into data.frame and ready for ggplot-plotting > > ## fortify spatialpolygondataframe into data.frame > library(ggplot2) > library(rgeos) > shape$id <- rownames(shape at data) > map.points <- fortify(shape, region = "id") > map.df <- merge(map.points, shape, by = "id") > > ############################################################### > # Only year 2011 > # As we want to plot map faceted by years from 2003 to 2011 we have to > # melt it into long format > map.df <- map.df[, -c(15:23,25,26)] > library(reshape2) > > map.df.l <- melt(data = map.df, id.vars = c("id", "long", "lat", > "group"), measure.vars = c("X2011")) > > # year variable (variable) is class string and type X20xx. Lets remove > # the X and convert it to numerical > > library(stringr) > map.df.l$variable <- str_replace_all(map.df.l$variable, "X", "") > map.df.l$variable <- factor(map.df.l$variable) > map.df.l$variable <- > as.numeric(levels(map.df.l$variable))[map.df.l$variable] > > # And finally the plot using ggplot2 > #Map shows proportion of materially deprived households at the NUTS2 > level. > #Grey color indicates missing data. > > > library(ggplot2) > > ggplot(map.df.l, aes(long,lat,group=group)) + > geom_polygon(aes(fill = value)) + > geom_polygon(data = map.df.l, aes(long,lat), > fill=NA, color = "white", > size=0.1) + # white borders > coord_map(project="orthographic", xlim=c(-22,34), ylim=c(35,70)) + # proj > labs(title = "Cartographie") + > theme_minimal() >-- Jon Olav Sk?ien Joint Research Centre - European Commission Institute for Environment and Sustainability (IES) Land Resource Management Unit Via Fermi 2749, TP 440, I-21027 Ispra (VA), ITALY jon.skoien at jrc.ec.europa.eu Tel: +39 0332 789205 Disclaimer: Views expressed in this email are those of the individual and do not necessarily represent official views of the European Commission.