Arnaud Michel
2014-Feb-06 13:21 UTC
[R] To map population of European countries from Eurostat
Hello
I would like to map the population of the European countries in 2011.
I am using the spatial shapefiles of Europe published by EUROSTAT.
I applyed the script below of Markus Kainubut but I had a problem with
the map.
Any ideas ?
Thanks for your help
#########################################
# Importing the data
library(SmarterPoland)
df <- getEurostatRaw(kod = "tps00001")
# rename 1ere variable
names(df) <- c("xx", 2002:2013)
# new variable df$unit et df$geo.time
df$unit <- lapply(strsplit(as.character(df$xx), ","),
"[", 1)
df$geo.time <- lapply(strsplit(as.character(df$xx), ","),
"[", 2)
# file df.l
df.l <- melt(data = df, id.vars = "geo.time", measure.vars =
c("2002",
"2003",
"2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011",
"2012",
"2013"))
df.l$geo.time <- unlist(df.l$geo.time) # unlist the geo.time variable
# Load the shapefile GISCO, subset it to NUTS2-level
# and merge the Eurostat
# attribute data with it into single SpatialPolygonDataFrame
Filetodownload <-
"http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/NUTS_2010_60M_SH.zip"
download.file(Filetodownload, destfile = "NUTS_2010_60M_SH.zip")
rm(Filetodownload)
# Unzip NUTS_2010_60M_SH.zip to SpatialPolygonsDataFrame
unzip("NUTS_2010_60M_SH.zip")
library(rgdal)
# Lecture du fichier
map <- readOGR(dsn = "./NUTS_2010_60M_SH/data", layer =
"NUTS_RG_60M_2010")
# OGR data source with driver: ESRI Shapefile
# Source: "./NUTS_2010_60M_SH/data", layer:
"NUTS_RG_60M_2010"
# with 1920 features and 4 fields
# Feature type: wkbPolygon with 2 dimensions
names(map)
#[1] "NUTS_ID" "STAT_LEVL_" "SHAPE_Leng"
"SHAPE_Area"
# as the data is at NUTS2-level, we subset the
# spatialpolygondataframe
map_nuts2 <- subset(map, STAT_LEVL_ <= 2)
# dim show how many regions are in the spatialpolygondataframe
dim(map_nuts2)
## [1] 467 4
# dim show how many regions are in the data.frame
dim(df)
## [1] 43 15
################################################################
# Spatial dataframe has 467 rows and attribute data 43. We need to make
# attribute data to have similar number of rows
NUTS_ID <- as.character(map_nuts2$NUTS_ID)
VarX <- rep("empty", 467)
dat <- data.frame(NUTS_ID, VarX)
# then we shall merge this with Eurostat data.frame
dat2 <- merge(dat, df, by.x = "NUTS_ID", by.y =
"geo.time", all.x = TRUE)
## merge this manipulated attribute data with the spatialpolygondataframe
## there are still duplicates in the data, remove them
dat2$dup <- duplicated(dat2$NUTS_ID)
dat3 <- subset(dat2, dup == FALSE)
## rownames
row.names(dat3) <- dat3$NUTS_ID
row.names(map_nuts2) <- as.character(map_nuts2$NUTS_ID)
## order data
dat3 <- dat3[order(row.names(dat3)), ]
map_nuts2 <- map_nuts2[order(row.names(map_nuts2)), ]
## join
library(maptools)
shape <- spCbind(map_nuts2, dat3)
##########################################################################
# Munging the shapefile into data.frame and ready for ggplot-plotting
## fortify spatialpolygondataframe into data.frame
library(ggplot2)
library(rgeos)
shape$id <- rownames(shape at data)
map.points <- fortify(shape, region = "id")
map.df <- merge(map.points, shape, by = "id")
###############################################################
# Only year 2011
# As we want to plot map faceted by years from 2003 to 2011 we have to
# melt it into long format
map.df <- map.df[, -c(15:23,25,26)]
library(reshape2)
map.df.l <- melt(data = map.df, id.vars = c("id", "long",
"lat",
"group"), measure.vars = c("X2011"))
# year variable (variable) is class string and type X20xx. Lets remove
# the X and convert it to numerical
library(stringr)
map.df.l$variable <- str_replace_all(map.df.l$variable, "X",
"")
map.df.l$variable <- factor(map.df.l$variable)
map.df.l$variable <-
as.numeric(levels(map.df.l$variable))[map.df.l$variable]
# And finally the plot using ggplot2
#Map shows proportion of materially deprived households at the NUTS2 level.
#Grey color indicates missing data.
library(ggplot2)
ggplot(map.df.l, aes(long,lat,group=group)) +
geom_polygon(aes(fill = value)) +
geom_polygon(data = map.df.l, aes(long,lat),
fill=NA, color = "white",
size=0.1) + # white borders
coord_map(project="orthographic", xlim=c(-22,34), ylim=c(35,70)) + #
proj
labs(title = "Cartographie") +
theme_minimal()
--
Michel ARNAUD
Charg? de mission aupr?s du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31
Jon Olav Skoien
2014-Feb-07 09:38 UTC
[R] To map population of European countries from Eurostat
Michel, I am no expert on ggplot2 and cannot explain why, but it seems the NAs in map.df.l$value cause some problems. Try to remove them before plotting or examine your code to figure out why you get them. They seem to be on the borders between NUTS2-level objects. Best wishes, Jon On 06-Feb-14 14:21, Arnaud Michel wrote:> Hello > I would like to map the population of the European countries in 2011. > I am using the spatial shapefiles of Europe published by EUROSTAT. > I applyed the script below of Markus Kainubut but I had a problem with > the map. > > Any ideas ? > Thanks for your help > > ######################################### > # Importing the data > library(SmarterPoland) > df <- getEurostatRaw(kod = "tps00001") > > # rename 1ere variable > names(df) <- c("xx", 2002:2013) > > # new variable df$unit et df$geo.time > df$unit <- lapply(strsplit(as.character(df$xx), ","), "[", 1) > df$geo.time <- lapply(strsplit(as.character(df$xx), ","), "[", 2) > > # file df.l > df.l <- melt(data = df, id.vars = "geo.time", measure.vars = c("2002", > "2003", > "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", > "2012", "2013")) > > df.l$geo.time <- unlist(df.l$geo.time) # unlist the geo.time variable > > # Load the shapefile GISCO, subset it to NUTS2-level > # and merge the Eurostat > # attribute data with it into single SpatialPolygonDataFrame > > Filetodownload <- > "http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/NUTS_2010_60M_SH.zip" > download.file(Filetodownload, destfile = "NUTS_2010_60M_SH.zip") > rm(Filetodownload) > > # Unzip NUTS_2010_60M_SH.zip to SpatialPolygonsDataFrame > unzip("NUTS_2010_60M_SH.zip") > > library(rgdal) > # Lecture du fichier > map <- readOGR(dsn = "./NUTS_2010_60M_SH/data", layer = > "NUTS_RG_60M_2010") > # OGR data source with driver: ESRI Shapefile > # Source: "./NUTS_2010_60M_SH/data", layer: "NUTS_RG_60M_2010" > # with 1920 features and 4 fields > # Feature type: wkbPolygon with 2 dimensions > > names(map) > #[1] "NUTS_ID" "STAT_LEVL_" "SHAPE_Leng" "SHAPE_Area" > > # as the data is at NUTS2-level, we subset the > # spatialpolygondataframe > map_nuts2 <- subset(map, STAT_LEVL_ <= 2) > > # dim show how many regions are in the spatialpolygondataframe > dim(map_nuts2) > ## [1] 467 4 > > # dim show how many regions are in the data.frame > dim(df) > ## [1] 43 15 > > ################################################################ > # Spatial dataframe has 467 rows and attribute data 43. We need to make > # attribute data to have similar number of rows > NUTS_ID <- as.character(map_nuts2$NUTS_ID) > VarX <- rep("empty", 467) > dat <- data.frame(NUTS_ID, VarX) > > # then we shall merge this with Eurostat data.frame > dat2 <- merge(dat, df, by.x = "NUTS_ID", by.y = "geo.time", all.x = TRUE) > > ## merge this manipulated attribute data with the spatialpolygondataframe > ## there are still duplicates in the data, remove them > dat2$dup <- duplicated(dat2$NUTS_ID) > dat3 <- subset(dat2, dup == FALSE) > > ## rownames > row.names(dat3) <- dat3$NUTS_ID > row.names(map_nuts2) <- as.character(map_nuts2$NUTS_ID) > > ## order data > dat3 <- dat3[order(row.names(dat3)), ] > map_nuts2 <- map_nuts2[order(row.names(map_nuts2)), ] > > ## join > library(maptools) > shape <- spCbind(map_nuts2, dat3) > > ########################################################################## > > # Munging the shapefile into data.frame and ready for ggplot-plotting > > ## fortify spatialpolygondataframe into data.frame > library(ggplot2) > library(rgeos) > shape$id <- rownames(shape at data) > map.points <- fortify(shape, region = "id") > map.df <- merge(map.points, shape, by = "id") > > ############################################################### > # Only year 2011 > # As we want to plot map faceted by years from 2003 to 2011 we have to > # melt it into long format > map.df <- map.df[, -c(15:23,25,26)] > library(reshape2) > > map.df.l <- melt(data = map.df, id.vars = c("id", "long", "lat", > "group"), measure.vars = c("X2011")) > > # year variable (variable) is class string and type X20xx. Lets remove > # the X and convert it to numerical > > library(stringr) > map.df.l$variable <- str_replace_all(map.df.l$variable, "X", "") > map.df.l$variable <- factor(map.df.l$variable) > map.df.l$variable <- > as.numeric(levels(map.df.l$variable))[map.df.l$variable] > > # And finally the plot using ggplot2 > #Map shows proportion of materially deprived households at the NUTS2 > level. > #Grey color indicates missing data. > > > library(ggplot2) > > ggplot(map.df.l, aes(long,lat,group=group)) + > geom_polygon(aes(fill = value)) + > geom_polygon(data = map.df.l, aes(long,lat), > fill=NA, color = "white", > size=0.1) + # white borders > coord_map(project="orthographic", xlim=c(-22,34), ylim=c(35,70)) + # proj > labs(title = "Cartographie") + > theme_minimal() >-- Jon Olav Sk?ien Joint Research Centre - European Commission Institute for Environment and Sustainability (IES) Land Resource Management Unit Via Fermi 2749, TP 440, I-21027 Ispra (VA), ITALY jon.skoien at jrc.ec.europa.eu Tel: +39 0332 789205 Disclaimer: Views expressed in this email are those of the individual and do not necessarily represent official views of the European Commission.