cavedave · July 10, 2022 13:40 · cavedave · Oct 1, 2019 · cavedave · Oct 2, 2019
diff --git a/age15to64.r b/age15to64.r

 library(eurostat)      # eurostat data
 library(rnaturalearth) # worldwide map data
 library(tidyverse)     # tidy data transformation
 library(lubridate)     # date and time support
 library(sf)            # simple features GIS
 library(RColorBrewer)

 euro_pop <-
  get_eurostat('demo_r_pjanaggr3', stringsAsFactors = FALSE) %>%
  filter(year(time) %in% c(2016, 2017),
         str_length(geo) == 5, # NUTS-3
         age == 'Y15-64'
        )
         
 # download geospatial data for NUTS-3 regions
 euro_nuts3_sf <-
  get_eurostat_geospatial(output_class = 'sf',
                          resolution = '60', nuts_level = 3) %>%
  st_transform(crs = 3035)
  
  # download geospatial data for European and Asian countries
 eura <-
  ne_countries(continent = c('europe', 'asia'), returnclass = 'sf') %>%
  st_transform(crs = 3035)
  
 # calculate difference in absolute population numbers of male and female
 euro_sex_diff <-
  euro_pop %>%
  filter(sex %in% c('M', 'F')) %>%
  spread(sex, values) %>%
  mutate(sex_diff = ((`F` - `M`)/(`F` + `M`))*100) %>%
  drop_na()


 #find most skewed area 
 #euro_sex_diff[which.max(euro_sex_diff$sex_diff),]

 # choropleth-map

 breaks = c(-Inf,-10,-5, -3,-2, -1,0,1, 2, 3, 5,10, Inf)#7
 labels = c('10%+ more males ',
           '5-10% more males', '3-5% more males', '2% more males','1% more males','equal','1% more females ','2% more females ',
           '2-3% more females', '3-5% more females',
           '5-10% more females','10%+ more females')#6 one less

 cols <-rev(brewer.pal(11,"RdBu"))

 # then remove the first darkest blue that you don't need
 #palette = 'RdBu'
 cols <- cols[2:11]

 plot_choropleth <-
  euro_nuts3_sf %>%
  left_join(y = euro_sex_diff, by = c('id' = 'geo')) %>%
  ggplot() +
  geom_sf(data = eura, color = 'white', fill = 'grey95') +
  geom_sf(aes(fill = cut(sex_diff, breaks, labels)),
          color = 'white', lwd = 0.1) +
  coord_sf(xlim = c(2.5e6, 7e6), ylim = c(1.35e6, 5.55e6), datum = NA) +
 scale_fill_manual(
 values=c('#001748','#053061', '#2166AC', '#4393C3', '#92C5DE',  '#808080','#FDDBC7', '#F4A582', '#D6604D', '#B2182B', '#67001F','#4E0006'),
    name = "Sex Ratio 2017 age 15-64",
    breaks = labels, # to omit the NA level
    guide = guide_legend(reverse = TRUE)
  ) +

  theme_void() +
  theme(legend.position = c(0.83, 0.7)) +
  labs(caption = 'Data: Eurostat')
  
  ggsave('EUage15-64.png')
diff --git a/sexratio.r b/sexratio.r
 library(eurostat)      # eurostat data
 library(rnaturalearth) # worldwide map data
 library(tidyverse)     # tidy data transformation
 library(lubridate)     # date and time support
 library(sf)            # simple features GIS

 euro_pop <-
  get_eurostat('demo_r_pjanaggr3', stringsAsFactors = FALSE) %>%
  filter(
         str_length(geo) == 5, # NUTS-3
         age == 'TOTAL')#TODO I should have a filter in here for date as well
         
 # download geospatial data for NUTS-3 regions
 euro_nuts3_sf <-
  get_eurostat_geospatial(output_class = 'sf',
                          resolution = '60', nuts_level = 3) %>%
  st_transform(crs = 3035)
  
  # download geospatial data for European and Asian countries
 eura <-
  ne_countries(continent = c('europe', 'asia'), returnclass = 'sf') %>%
  st_transform(crs = 3035)
  
 # calculate difference in absolute population numbers of male and female
 euro_sex_diff <-
  euro_pop %>%
  filter(sex %in% c('M', 'F')) %>%
  spread(sex, values) %>%
  mutate(sex_diff = ((`F` - `M`)/(`F` + `M`))*100) %>%
  drop_na()


 #find most skewed area 
 euro_sex_diff[which.max(euro_sex_diff$sex_diff),]

 # choropleth-map
 breaks = c(-16, -5,-2, -1,0,1, 2, 3, 5,8, Inf)
 bels = c('5%+ more males ',
           '2 to 5% more males', '2% more males', '1% more males','equal','1% more females ',
           '2 to 3% more females', '3 to 5% more females',
           '5-8% extra females','8%+ extra females')#one less than breaks as this is the space between the breaks
 plot_choropleth <-
  euro_nuts3_sf %>%
  left_join(y = euro_sex_diff, by = c('id' = 'geo')) %>%
  ggplot() +
  geom_sf(data = eura, color = 'white', fill = 'grey95') +
  geom_sf(aes(fill = cut(sex_diff, breaks, labels)),
          color = 'white', lwd = 0.1) +
  coord_sf(xlim = c(2.5e6, 7e6), ylim = c(1.35e6, 5.55e6), datum = NA) +
  #scale_fill_brewer(name = 'Sex Ratio 2017',
  #                  type = 'div', palette = 'RdBu',direction=-1,#5,
  #                  breaks = labels, # to omit the NA level
  #                  guide = guide_legend(reverse = TRUE)) +
 #improvement from https://gist.github.com/kiernann/d7173f18f81ce753c95290f8ca1186e0
  scale_fill_manual(
    values = rev(RColorBrewer::brewer.pal(n = 11, name = "RdBu")[1:10]),
    name = "Sex Ratio 2017",
    breaks = labels, # to omit the NA level
    guide = guide_legend(reverse = TRUE)
  ) +
  theme_void() +
  theme(legend.position = c(0.83, 0.7)) +
  labs(caption = 'Data: Eurostat')
  
  ggsave('EUGender2.png', width=10, height=10)

	library(eurostat) # eurostat data
	library(rnaturalearth) # worldwide map data
	library(tidyverse) # tidy data transformation
	library(lubridate) # date and time support
	library(sf) # simple features GIS
	library(RColorBrewer)

	euro_pop <-
	get_eurostat('demo_r_pjanaggr3', stringsAsFactors = FALSE) %>%
	filter(year(time) %in% c(2016, 2017),
	str_length(geo) == 5, # NUTS-3
	age == 'Y15-64'
	)

	# download geospatial data for NUTS-3 regions
	euro_nuts3_sf <-
	get_eurostat_geospatial(output_class = 'sf',
	resolution = '60', nuts_level = 3) %>%
	st_transform(crs = 3035)

	# download geospatial data for European and Asian countries
	eura <-
	ne_countries(continent = c('europe', 'asia'), returnclass = 'sf') %>%
	st_transform(crs = 3035)

	# calculate difference in absolute population numbers of male and female
	euro_sex_diff <-
	euro_pop %>%
	filter(sex %in% c('M', 'F')) %>%
	spread(sex, values) %>%
	mutate(sex_diff = ((`F` - `M`)/(`F` + `M`))*100) %>%
	drop_na()


	#find most skewed area
	#euro_sex_diff[which.max(euro_sex_diff$sex_diff),]

	# choropleth-map

	breaks = c(-Inf,-10,-5, -3,-2, -1,0,1, 2, 3, 5,10, Inf)#7
	labels = c('10%+ more males ',
	'5-10% more males', '3-5% more males', '2% more males','1% more males','equal','1% more females ','2% more females ',
	'2-3% more females', '3-5% more females',
	'5-10% more females','10%+ more females')#6 one less

	cols <-rev(brewer.pal(11,"RdBu"))

	# then remove the first darkest blue that you don't need
	#palette = 'RdBu'
	cols <- cols[2:11]

	plot_choropleth <-
	euro_nuts3_sf %>%
	left_join(y = euro_sex_diff, by = c('id' = 'geo')) %>%
	ggplot() +
	geom_sf(data = eura, color = 'white', fill = 'grey95') +
	geom_sf(aes(fill = cut(sex_diff, breaks, labels)),
	color = 'white', lwd = 0.1) +
	coord_sf(xlim = c(2.5e6, 7e6), ylim = c(1.35e6, 5.55e6), datum = NA) +
	scale_fill_manual(
	values=c('#001748','#053061', '#2166AC', '#4393C3', '#92C5DE', '#808080','#FDDBC7', '#F4A582', '#D6604D', '#B2182B', '#67001F','#4E0006'),
	name = "Sex Ratio 2017 age 15-64",
	breaks = labels, # to omit the NA level
	guide = guide_legend(reverse = TRUE)
	) +

	theme_void() +
	theme(legend.position = c(0.83, 0.7)) +
	labs(caption = 'Data: Eurostat')

	ggsave('EUage15-64.png')