Created
January 15, 2018 10:47
-
-
Save jschoeley/a7f71813b7d68d79254c6e7517564f6b to your computer and use it in GitHub Desktop.
Tricolore: Experimenting with different legend styles for centered ternary color scales
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' --- | |
#' title: Different legend styles for centered ternary color scales | |
#' author: Jonas Schöley, Ilya Kashnitsky | |
#' output: | |
#' pdf_document | |
#' --- | |
#+echo=FALSE | |
knitr::opts_chunk$set(warning=FALSE, message=FALSE, | |
fig.width = 15, fig.height = 15) | |
# Ilyas code -- preparing the map ----------------------------------------- | |
# ikashnitsky.github.io 2017-06-30 | |
# load required packages | |
library(tidyverse) # data manipulation and viz | |
library(lubridate) # easy manipulations with dates | |
library(forcats) # good for dealing with factors | |
library(stringr) # good for dealing with text strings | |
library(eurostat) # grab data | |
library(rgdal) # deal with shapefiles | |
library(rgeos) | |
library(maptools) | |
# Download geodata | |
# Eurostat official shapefiles for regions | |
# http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units | |
# geodata will be stored in a directory "geodata" | |
ifelse(!dir.exists('geodata'), | |
dir.create('geodata'), | |
paste("Directory already exists")) | |
f <- tempfile() | |
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_20M_SH.zip", destfile = f) | |
unzip(f, exdir = "geodata/.") | |
NUTS_raw <- readOGR("geodata/NUTS_2013_20M_SH/data/.", "NUTS_RG_20M_2013") | |
# colnames to lower case | |
names(NUTS_raw@data) <- tolower(names(NUTS_raw@data)) | |
# change coordinate system to LAEA Europe (EPSG:3035) | |
# check out https://epsg.io | |
epsg3035 <- "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs" | |
NUTS <- spTransform(NUTS_raw, CRS(epsg3035)) | |
# create borders between countries | |
NUTS0 <- NUTS[NUTS$stat_levl_==0,] | |
identify_borders <- function(SPolyDF){ | |
borders <- gDifference( | |
as(SPolyDF,"SpatialLines"), | |
as(gUnaryUnion(SPolyDF),"SpatialLines"), | |
byid=TRUE) | |
df <- data.frame(len = sapply(1:length(borders), | |
function(i) gLength(borders[i, ]))) | |
rownames(df) <- sapply(1:length(borders), | |
function(i) borders@lines[[i]]@ID) | |
SLDF <- SpatialLinesDataFrame(borders, data = df) | |
return(SLDF) | |
} | |
Sborders <- identify_borders(NUTS0) | |
bord <- fortify(Sborders) | |
# subset NUTS-3 regions | |
NUTS3 <- NUTS[NUTS$stat_levl_==3,] | |
# remote areas to remove (NUTS-2) | |
remote <- c(paste0('ES',c(63,64,70)),paste('FRA',1:5,sep=''),'PT20','PT30') | |
# make the geodata ready for ggplot | |
gd3 <- fortify(NUTS3, region = "nuts_id") %>% | |
filter(!str_sub(id, 1, 4) %in% remote, | |
!str_sub(id, 1, 2) == "AL") | |
# let's add neighbouring countries | |
f <- tempfile() | |
download.file("http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/CNTR_2010_20M_SH.zip", destfile = f) | |
unzip(f, exdir = "geodata/.") | |
WORLD <- readOGR("geodata/CNTR_2010_20M_SH/CNTR_2010_20M_SH/Data/.", | |
"CNTR_RG_20M_2010") | |
# colnames to lower case | |
names(WORLD@data) <- tolower(names(WORLD@data)) | |
# filter only Europe and the neighbouring countries | |
neigh_subset <- c("AT", "BE", "BG", "CH", "CZ", "DE", "DK", | |
"EE", "EL", "ES", "FI", "FR", "HU", "IE", "IS", "IT", "LT", "LV", | |
"NL", "NO", "PL", "PT", "SE", "SI", "SK", "UK", "IM", "FO", "GI", | |
"LU", "LI", "AD", "MC", "MT", "VA", "SM", "HR", "BA", "ME", "MK", | |
"AL", "RS", "RO", "MD", "UA", "BY", "RU", "TR", "CY", "EG", "LY", | |
"TN", "DZ", "MA", "GG", "JE", "KZ", "AM", "GE", "AZ", "SY", "IQ", | |
"IR", "IL", "JO", "PS", "SA", "LB", "MN", "LY", "EG") | |
NEIGH <- WORLD[WORLD$cntr_id %in% neigh_subset,] | |
# reproject the shapefile to a pretty projection for mapping Europe | |
Sneighbors <- spTransform(NEIGH, CRS(epsg3035)) | |
# cut of everything behing the borders | |
rect <- rgeos::readWKT( | |
"POLYGON((20e5 10e5, | |
80e5 10e5, | |
80e5 60e5, | |
20e5 60e5, | |
20e5 10e5))" | |
) | |
Sneighbors <- rgeos::gIntersection(Sneighbors,rect,byid = T) | |
# create a blank map | |
basemap <- ggplot() + | |
geom_polygon(data = fortify(Sneighbors), | |
aes(x = long, y = lat, group = group), | |
fill = "grey90",color = "grey90") + | |
coord_equal(ylim = c(1350000,5550000), xlim = c(2500000, 7500000)) + | |
theme_void() + | |
theme(panel.border = element_rect(color = "black",size = .5,fill = NA), | |
legend.position = c(1, 1), | |
legend.justification = c(1, 1), | |
legend.background = element_rect(colour = NA, fill = NA), | |
legend.title = element_text(size = 15), | |
legend.text = element_text(size = 15))+ | |
scale_x_continuous(expand = c(0,0)) + | |
scale_y_continuous(expand = c(0,0)) + | |
labs(x = NULL, y = NULL) | |
# Get stat data | |
# Find the needed dataset code | |
# http://ec.europa.eu/eurostat/web/regions/data/database | |
# download the data on broad pop structures at NUTS-3 level | |
df <- get_eurostat("demo_r_pjanaggr3") | |
# if the automated download does not work, the data can be grabbed manually at | |
# http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing | |
# filter NUTS-3, 2015, both sex, calculate shares | |
both15 <- | |
df %>% | |
filter(sex=="T", nchar(paste(geo))==5, | |
!str_sub(geo, 1, 4) %in% remote, | |
!str_sub(geo, 1, 2) %in% c("AL", "MK"), | |
year(time)==2015) %>% | |
droplevels() %>% | |
transmute(id = geo, age, value = values) %>% | |
spread(age, value) | |
# a function to overcome the problem of mapping nested polygons | |
# check out https://stackoverflow.com/questions/21748852 | |
gghole <- function(fort){ | |
poly <- fort[fort$id %in% fort[fort$hole,]$id,] | |
hole <- fort[!fort$id %in% fort[fort$hole,]$id,] | |
out <- list(poly,hole) | |
names(out) <- c('poly','hole') | |
return(out) | |
} | |
# Tricolore: raw proportions ---------------------------------------------- | |
devtools::install_github('jschoeley/[email protected]') | |
library(tricolore) | |
library(ggtern) | |
# color-codes and legend | |
tric <- Tricolore(both15, p1 = 'Y_LT15', p2 = 'Y15-64', p3 = 'Y_GE65', show_center = FALSE) | |
# customize legend | |
tric_legend <- | |
tric$legend + | |
scale_L_continuous('<15') + | |
scale_T_continuous('15-64') + | |
scale_R_continuous('65+') + | |
labs(subtitle = 'Age structure in Europe 2015\nUncentered proportions', | |
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1% 65+') + | |
theme(plot.background = element_rect(color = 'black')) | |
# add color-coded proportions to map | |
both15$rgb <- tric$hexsrgb | |
# plot color-coded map | |
p1 <- | |
basemap + | |
geom_map(map = gghole(gd3)[[1]], data = both15, | |
aes(map_id = id, fill = rgb))+ | |
geom_map(map = gghole(gd3)[[2]], data = both15, | |
aes(map_id = id, fill = rgb))+ | |
scale_fill_identity()+ | |
geom_path(data = bord, aes(long, lat, group = group), | |
color = "white", size = .5)+ | |
theme(legend.position = "none") + | |
annotation_custom(ggplotGrob(tric_legend), | |
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5) | |
p1 | |
# Tricolore -- deviations from EU average --------------------------------- | |
# EU-28 age-structure 2015 | |
center = c(0.155, 0.654, 0.191) | |
# coordinates and labels for the centered gridlines of a ternary diagram | |
TernaryCentroidGrid <- function (center) { | |
# center percent difference labels | |
labels <- seq(-1, 1, 0.1) | |
labels <- data.frame( | |
L = labels[labels >= -center[1]][1:10], | |
T = labels[labels >= -center[2]][1:10], | |
R = labels[labels >= -center[3]][1:10] | |
) | |
# breaks of uncentered grid | |
breaks = data.frame( | |
L = labels$L + center[1], | |
T = labels$T + center[2], | |
R = labels$R + center[3] | |
) | |
list(labels = labels, breaks = breaks) | |
} | |
# color-codes and legend | |
tric <- Tricolore(both15, p1 = 'Y_LT15', p2 = 'Y15-64', p3 = 'Y_GE65', | |
center = center, scale = 3, hue = 0.3) | |
# percent-point difference grid | |
legend_grid <- TernaryCentroidGrid(center) | |
# customize legend | |
tric_legend <- | |
tric$legend + | |
scale_L_continuous('<15', | |
breaks = legend_grid$breaks$L, | |
labels = legend_grid$labels$L) + | |
scale_T_continuous('15-64', | |
breaks = legend_grid$breaks$T, | |
labels = legend_grid$labels$T) + | |
scale_R_continuous('65+', | |
breaks = legend_grid$breaks$R, | |
labels = legend_grid$labels$R) + | |
labs(subtitle = 'Age structure in Europe 2015\nPercent-point diff. from EU avg.', | |
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1% 65+') + | |
theme(plot.background = element_rect(color = 'black')) | |
# add color-coded proportions to map | |
both15$rgb <- tric$hexsrgb | |
# plot color-coded map | |
p2 <- | |
basemap + | |
geom_map(map = gghole(gd3)[[1]], data = both15, | |
aes(map_id = id, fill = rgb))+ | |
geom_map(map = gghole(gd3)[[2]], data = both15, | |
aes(map_id = id, fill = rgb))+ | |
scale_fill_identity()+ | |
geom_path(data = bord, aes(long, lat, group = group), | |
color = "white", size = .5)+ | |
theme(legend.position = "none") + | |
annotation_custom(ggplotGrob(tric_legend), | |
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5) | |
p2 | |
# Tricolore -- zoomed deviations from EU average --------------------------- | |
# Same colors as before, we just zoom into the legend a bit. | |
# Ternary limits spanning a regular triangle around a given center | |
CenterZoomLimits <- function (center = rep(1/3, 3), scale = 1) { | |
rbind( | |
center-scale*1/3*(1-max(center)), | |
center+scale*2/3*(1-max(center)) | |
) | |
} | |
legend_limits <- CenterZoomLimits(center) | |
tric_legend <- | |
tric$legend + | |
scale_L_continuous('<15', | |
breaks = legend_grid$breaks$L, | |
labels = legend_grid$labels$L, | |
limits = legend_limits[,1]) + | |
scale_T_continuous('15-64', | |
breaks = legend_grid$breaks$T, | |
labels = legend_grid$labels$T, | |
limits = legend_limits[,2]) + | |
scale_R_continuous('65+', | |
breaks = legend_grid$breaks$R, | |
labels = legend_grid$labels$R, | |
limits = legend_limits[,3]) + | |
labs(subtitle = 'Age structure in Europe 2015\nPercent point diff. from EU avg.', | |
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1%') + | |
theme(plot.background = element_rect(color = 'black')) | |
# plot color-coded map | |
p3 <- | |
p2 + | |
annotation_custom(ggplotGrob(tric_legend), | |
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5) | |
p3 | |
# Tricolore -- zoomed deviations from EU average --------------------------- | |
# Same colors as before, we just use the original proportion labels on the | |
# zoomed legend. | |
tric_legend <- | |
tric$legend + | |
scale_L_continuous('<15', | |
limits = legend_limits[,1]) + | |
scale_T_continuous('15-64', | |
limits = legend_limits[,2]) + | |
scale_R_continuous('65+', | |
limits = legend_limits[,3]) + | |
labs(subtitle = 'Age structure in Europe 2015\nColors show deviations from EU avg.', | |
caption = 'EU-28 average:\n15.5% <15, 65.4% 15-64, 19.1%') + | |
theme(plot.background = element_rect(color = 'black')) | |
# plot color-coded map | |
p4 <- | |
p3 + annotation_custom(ggplotGrob(tric_legend), | |
xmin = 50e5, xmax = 80e5, ymin = 35e5, ymax = 55e5) | |
p4 | |
grid.arrange(p1, p2, p3, p4, ncol = 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment