Skip to content

Instantly share code, notes, and snippets.

@cavedave
Last active July 10, 2022 13:40
Show Gist options
  • Save cavedave/eeb7b7110d029fec0f158305f24ece57 to your computer and use it in GitHub Desktop.
Save cavedave/eeb7b7110d029fec0f158305f24ece57 to your computer and use it in GitHub Desktop.
Sex Ratio in Europe based on code from https://jschoeley.github.io/2018/07/03/bubble-grid_vs_choropleth.html Norway has a missing spot in future combine Nord Trøndelag and Sort Trønderlag to fill this out.
library(eurostat) # eurostat data
library(rnaturalearth) # worldwide map data
library(tidyverse) # tidy data transformation
library(lubridate) # date and time support
library(sf) # simple features GIS
library(RColorBrewer)
euro_pop <-
get_eurostat('demo_r_pjanaggr3', stringsAsFactors = FALSE) %>%
filter(year(time) %in% c(2016, 2017),
str_length(geo) == 5, # NUTS-3
age == 'Y15-64'
)
# download geospatial data for NUTS-3 regions
euro_nuts3_sf <-
get_eurostat_geospatial(output_class = 'sf',
resolution = '60', nuts_level = 3) %>%
st_transform(crs = 3035)
# download geospatial data for European and Asian countries
eura <-
ne_countries(continent = c('europe', 'asia'), returnclass = 'sf') %>%
st_transform(crs = 3035)
# calculate difference in absolute population numbers of male and female
euro_sex_diff <-
euro_pop %>%
filter(sex %in% c('M', 'F')) %>%
spread(sex, values) %>%
mutate(sex_diff = ((`F` - `M`)/(`F` + `M`))*100) %>%
drop_na()
#find most skewed area
#euro_sex_diff[which.max(euro_sex_diff$sex_diff),]
# choropleth-map
breaks = c(-Inf,-10,-5, -3,-2, -1,0,1, 2, 3, 5,10, Inf)#7
labels = c('10%+ more males ',
'5-10% more males', '3-5% more males', '2% more males','1% more males','equal','1% more females ','2% more females ',
'2-3% more females', '3-5% more females',
'5-10% more females','10%+ more females')#6 one less
cols <-rev(brewer.pal(11,"RdBu"))
# then remove the first darkest blue that you don't need
#palette = 'RdBu'
cols <- cols[2:11]
plot_choropleth <-
euro_nuts3_sf %>%
left_join(y = euro_sex_diff, by = c('id' = 'geo')) %>%
ggplot() +
geom_sf(data = eura, color = 'white', fill = 'grey95') +
geom_sf(aes(fill = cut(sex_diff, breaks, labels)),
color = 'white', lwd = 0.1) +
coord_sf(xlim = c(2.5e6, 7e6), ylim = c(1.35e6, 5.55e6), datum = NA) +
scale_fill_manual(
values=c('#001748','#053061', '#2166AC', '#4393C3', '#92C5DE', '#808080','#FDDBC7', '#F4A582', '#D6604D', '#B2182B', '#67001F','#4E0006'),
name = "Sex Ratio 2017 age 15-64",
breaks = labels, # to omit the NA level
guide = guide_legend(reverse = TRUE)
) +
theme_void() +
theme(legend.position = c(0.83, 0.7)) +
labs(caption = 'Data: Eurostat')
ggsave('EUage15-64.png')
library(eurostat) # eurostat data
library(rnaturalearth) # worldwide map data
library(tidyverse) # tidy data transformation
library(lubridate) # date and time support
library(sf) # simple features GIS
euro_pop <-
get_eurostat('demo_r_pjanaggr3', stringsAsFactors = FALSE) %>%
filter(
str_length(geo) == 5, # NUTS-3
age == 'TOTAL')#TODO I should have a filter in here for date as well
# download geospatial data for NUTS-3 regions
euro_nuts3_sf <-
get_eurostat_geospatial(output_class = 'sf',
resolution = '60', nuts_level = 3) %>%
st_transform(crs = 3035)
# download geospatial data for European and Asian countries
eura <-
ne_countries(continent = c('europe', 'asia'), returnclass = 'sf') %>%
st_transform(crs = 3035)
# calculate difference in absolute population numbers of male and female
euro_sex_diff <-
euro_pop %>%
filter(sex %in% c('M', 'F')) %>%
spread(sex, values) %>%
mutate(sex_diff = ((`F` - `M`)/(`F` + `M`))*100) %>%
drop_na()
#find most skewed area
euro_sex_diff[which.max(euro_sex_diff$sex_diff),]
# choropleth-map
breaks = c(-16, -5,-2, -1,0,1, 2, 3, 5,8, Inf)
bels = c('5%+ more males ',
'2 to 5% more males', '2% more males', '1% more males','equal','1% more females ',
'2 to 3% more females', '3 to 5% more females',
'5-8% extra females','8%+ extra females')#one less than breaks as this is the space between the breaks
plot_choropleth <-
euro_nuts3_sf %>%
left_join(y = euro_sex_diff, by = c('id' = 'geo')) %>%
ggplot() +
geom_sf(data = eura, color = 'white', fill = 'grey95') +
geom_sf(aes(fill = cut(sex_diff, breaks, labels)),
color = 'white', lwd = 0.1) +
coord_sf(xlim = c(2.5e6, 7e6), ylim = c(1.35e6, 5.55e6), datum = NA) +
#scale_fill_brewer(name = 'Sex Ratio 2017',
# type = 'div', palette = 'RdBu',direction=-1,#5,
# breaks = labels, # to omit the NA level
# guide = guide_legend(reverse = TRUE)) +
#improvement from https://gist.github.com/kiernann/d7173f18f81ce753c95290f8ca1186e0
scale_fill_manual(
values = rev(RColorBrewer::brewer.pal(n = 11, name = "RdBu")[1:10]),
name = "Sex Ratio 2017",
breaks = labels, # to omit the NA level
guide = guide_legend(reverse = TRUE)
) +
theme_void() +
theme(legend.position = c(0.83, 0.7)) +
labs(caption = 'Data: Eurostat')
ggsave('EUGender2.png', width=10, height=10)
@cavedave
Copy link
Author

cavedave commented Oct 3, 2019

EUGendery2r2

@cavedave
Copy link
Author

cavedave commented Oct 6, 2019

Issue with Norway grey region is Nord Trøndelag and Sør Trøndelag
have become Trøndelag county
#NO061 and NO062 have become NO060
#get values out and put them in by hand
Nor2<-subset(euro_pop, grepl("NO06", geo))
Nor2
#looks like

unit sex age geo time values
NR F Y15-64 NO061 2017-01-01 101957
NR F Y15-64 NO062 2017-01-01 41910

#use those figures to make your new row. Here these are M/F numbers
de<-data.frame("NR","F","Y15-64","NO060","2017-01-01",143867)
df<-data.frame("NR","M","Y15-64","NO060","2017-01-01",153519)

names(de)<-c("unit","sex","age","geo","time","values")
names(df)<-c("unit","sex","age","geo","time","values")

euro_pop <- rbind(euro_pop, de)
euro_pop <- rbind(euro_pop, df)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment