Forked from ikashnitsky/colorcoded-map-population-structures.R
Created
March 26, 2018 19:36
-
-
Save favstats/566b41d8559224f9c83e1450eafe96d6 to your computer and use it in GitHub Desktop.
Code to reproduce the RGB map of the population structure of NUTS-3 regions of Europe -- https://ikashnitsky.github.io/2017/colorcoded-map/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
# | |
# ikashnitsky.github.io 2017-06-30 | |
# Produce an RGB coded map of pop structures at NUTS-3 level | |
# Ilya Kashnitsky, [email protected] | |
# | |
################################################################################ | |
# load required packages | |
library(tidyverse) # data manipulation and viz | |
library(lubridate) # easy manipulations with dates | |
library(ggthemes) # themes for ggplot2 | |
library(forcats) # good for dealing with factors | |
library(stringr) # good for dealing with text strings | |
library(extrafont) # custom font | |
myfont <- "Roboto Condensed" | |
library(eurostat) # grab data | |
library(rgdal) # deal with shapefiles | |
library(rgeos) | |
library(maptools) | |
################################################################################ | |
# Download geodata | |
# Eurostat official shapefiles for regions | |
# http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units | |
# geodata will be stored in a directory "geodata" | |
ifelse(!dir.exists('geodata'), | |
dir.create('geodata'), | |
paste("Directory already exists")) | |
f <- tempfile() | |
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_20M_SH.zip", destfile = f) | |
unzip(f, exdir = "geodata/.") | |
NUTS_raw <- readOGR("geodata/NUTS_2013_20M_SH/data/.", "NUTS_RG_20M_2013") | |
# colnames to lower case | |
names(NUTS_raw@data) <- tolower(names(NUTS_raw@data)) | |
NUTS_raw@data %>% View | |
# let's have a look | |
plot(NUTS_raw) | |
# change coordinate system to LAEA Europe (EPSG:3035) | |
# check out https://epsg.io | |
epsg3035 <- "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs" | |
NUTS <- spTransform(NUTS_raw, CRS(epsg3035)) | |
# create borders between countries | |
NUTS0 <- NUTS[NUTS$stat_levl_==0,] | |
identify_borders <- function(SPolyDF){ | |
require(rgeos) | |
require(sp) | |
borders <- gDifference( | |
as(SPolyDF,"SpatialLines"), | |
as(gUnaryUnion(SPolyDF),"SpatialLines"), | |
byid=TRUE) | |
df <- data.frame(len = sapply(1:length(borders), | |
function(i) gLength(borders[i, ]))) | |
rownames(df) <- sapply(1:length(borders), | |
function(i) borders@lines[[i]]@ID) | |
SLDF <- SpatialLinesDataFrame(borders, data = df) | |
return(SLDF) | |
} | |
Sborders <- identify_borders(NUTS0) | |
plot(Sborders) | |
bord <- fortify(Sborders) | |
# subset NUTS-3 regions | |
NUTS3 <- NUTS[NUTS$stat_levl_==3,] | |
plot(NUTS3) | |
NUTS3[str_sub(paste(id), 1, 2) %in% "FR",]@data %>% View | |
# remote areas to remove (NUTS-2) | |
remote <- c(paste0('ES',c(63,64,70)),paste('FRA',1:5,sep=''),'PT20','PT30') | |
# make the geodata ready for ggplot | |
gd3 <- fortify(NUTS3, region = "nuts_id") %>% | |
filter(!str_sub(id, 1, 4) %in% remote, | |
!str_sub(id, 1, 2) == "AL") | |
# plot to check | |
basemap+ | |
geom_polygon(data = gd3, | |
aes(x = long, y = lat, group = group), | |
fill = "black")+ | |
coord_equal() | |
# let's add neighbouring countries | |
f <- tempfile() | |
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/CNTR_2010_20M_SH.zip", destfile = f) | |
unzip(f, exdir = "geodata/.") | |
WORLD <- readOGR("geodata/CNTR_2010_20M_SH/CNTR_2010_20M_SH/Data/.", | |
"CNTR_RG_20M_2010") | |
# colnames to lower case | |
names(WORLD@data) <- tolower(names(WORLD@data)) | |
# filter only Europe and the neighbouring countries | |
neigh_subset <- c("AT", "BE", "BG", "CH", "CZ", "DE", "DK", | |
"EE", "EL", "ES", "FI", "FR", "HU", "IE", "IS", "IT", "LT", "LV", | |
"NL", "NO", "PL", "PT", "SE", "SI", "SK", "UK", "IM", "FO", "GI", | |
"LU", "LI", "AD", "MC", "MT", "VA", "SM", "HR", "BA", "ME", "MK", | |
"AL", "RS", "RO", "MD", "UA", "BY", "RU", "TR", "CY", "EG", "LY", | |
"TN", "DZ", "MA", "GG", "JE", "KZ", "AM", "GE", "AZ", "SY", "IQ", | |
"IR", "IL", "JO", "PS", "SA", "LB", "MN", "LY", "EG") | |
NEIGH <- WORLD[WORLD$cntr_id %in% neigh_subset,] | |
# reproject the shapefile to a pretty projection for mapping Europe | |
Sneighbors <- spTransform(NEIGH, CRS(epsg3035)) | |
# cut of everything behing the borders | |
rect <- rgeos::readWKT( | |
"POLYGON((20e5 10e5, | |
80e5 10e5, | |
80e5 60e5, | |
20e5 60e5, | |
20e5 10e5))" | |
) | |
Sneighbors <- rgeos::gIntersection(Sneighbors,rect,byid = T) | |
plot(Sneighbors) | |
# create a blank map | |
basemap <- ggplot()+ | |
geom_polygon(data = fortify(Sneighbors), | |
aes(x = long, y = lat, group = group), | |
fill = "grey90",color = "grey90")+ | |
coord_equal(ylim = c(1350000,5550000), xlim = c(2500000, 7500000))+ | |
theme_map(base_family = myfont)+ | |
theme(panel.border = element_rect(color = "black",size = .5,fill = NA), | |
legend.position = c(1, 1), | |
legend.justification = c(1, 1), | |
legend.background = element_rect(colour = NA, fill = NA), | |
legend.title = element_text(size = 15), | |
legend.text = element_text(size = 15))+ | |
scale_x_continuous(expand = c(0,0)) + | |
scale_y_continuous(expand = c(0,0)) + | |
labs(x = NULL, y = NULL) | |
# save - just to skip the download process next timw | |
save(NUTS3, Sneighbors, Sborders, remote, file = "geodata.RData") | |
################################################################################ | |
# Get stat data | |
# Find the needed dataset code | |
# http://ec.europa.eu/eurostat/web/regions/data/database | |
# download the data on broad pop structures at NUTS-3 level | |
df <- get_eurostat("demo_r_pjanaggr3") | |
# if the automated download does not work, the data can be grabbed manually at | |
# http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing | |
# filter NUTS-3, 2015, both sex, calculate shares | |
both15 <- df %>% filter(sex=="T", nchar(paste(geo))==5, | |
!str_sub(geo, 1, 4) %in% remote, | |
!str_sub(geo, 1, 2) %in% c("AL", "MK"), | |
year(time)==2015) %>% | |
droplevels() %>% | |
transmute(id = geo, age, value = values) %>% | |
spread(age, value) %>% | |
mutate(sy = Y_LT15 / TOTAL, | |
sw = `Y15-64` / TOTAL, | |
so = 1 - (sy + sw), | |
sy_norm = sy %>% scales::rescale(), | |
sw_norm = sw %>% scales::rescale(), | |
so_norm = so %>% scales::rescale(), | |
# color = RGB(so, sw, sy) %>% hex(), | |
# color_norm = RGB(so_norm, sw_norm, sy_norm) %>% hex(), | |
color = rgb(so, sw, sy), | |
color_norm = rgb(so_norm, sw_norm, sy_norm) | |
) | |
# a function to overcome the problem of mapping nested polygons | |
# check out https://stackoverflow.com/questions/21748852 | |
gghole <- function(fort){ | |
poly <- fort[fort$id %in% fort[fort$hole,]$id,] | |
hole <- fort[!fort$id %in% fort[fort$hole,]$id,] | |
out <- list(poly,hole) | |
names(out) <- c('poly','hole') | |
return(out) | |
} | |
################################################################################ | |
# Ready to play! | |
map <- basemap + | |
geom_map(map = gghole(gd3)[[1]], data = both15, | |
aes(map_id = id, fill = color_norm))+ | |
geom_map(map = gghole(gd3)[[2]], data = both15, | |
aes(map_id = id, fill = color_norm))+ | |
scale_fill_identity()+ | |
geom_path(data = bord, aes(long, lat, group = group), | |
color = "white", size = .5)+ | |
theme(legend.position = "none") | |
# add legend | |
library(png) | |
download.file("https://upload.wikimedia.org/wikipedia/commons/8/8e/Barycentric_RGB.png", | |
destfile = "rgb-triangle.png", mode = 'wb') | |
mypng <- readPNG("rgb-triangle.png") | |
legend <- ggplot()+ | |
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = c(0,0))+ | |
annotate("text", x = .35, y = .15, hjust = 1, vjust = 1, | |
label = "Share of elderly\npopulation (65+)", | |
size = 5.5, family = myfont, color = "grey20")+ | |
annotate("text", x = .65, y = .15, hjust = 0, vjust = 1, | |
label = "Share of young\npopulation (0 - 14)", | |
size = 5.5, family = myfont, color = "grey20")+ | |
annotate("text", x = .5, y = .75, hjust = .5, vjust = 0, | |
label = "Share of population\nat working ages (15 - 64)", | |
size = 5.5, family = myfont, color = "grey20")+ | |
annotate("text", x = .5, y = .99, hjust = .5, vjust = 1, | |
label = toupper("Color-coding scheme"), | |
size = 7, family = myfont, color = "grey20")+ | |
annotation_raster(mypng, | |
xmin = .2, xmax = .8, | |
ymin = .2, ymax = (.2 + .3 * sqrt(3)))+ | |
theme_map() | |
library(gridExtra) | |
#assamble the final plot | |
together <- map + annotation_custom(ggplotGrob(legend), | |
xmin = 54.5e5,xmax = 74e5, | |
ymin = 33e5, ymax = 54.5e5)+ | |
annotate("text", x = 74e5, y = 14e5, hjust = 1, vjust = 0, | |
label = "Ilya Kashnitsky\nikashnitsky.github.io", | |
size = 5, family = myfont, color = "grey20")+ | |
annotate("text", x = 26e5, y = 14e5, hjust = 0, vjust = 0, | |
label = "Data: Eurostat, 2015; Geodata: NUTS 2013", | |
size = 5, family = myfont, color = "grey20") | |
# save | |
ggsave("map-nuts3.png", together, width = 12, height = 10.08, | |
dpi = 600, type = "cairo-png") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment