Last active
August 28, 2019 05:18
-
-
Save chris-prener/cd6b0aa0b2096daf13e545f4264ea6bb to your computer and use it in GitHub Desktop.
Adaptation of Grossenbacher/Zehr Bivariate Tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Bivariate Choropleth Map | |
## this is based on Timo Grossenbacher and Angelo Zehr's tutorial - | |
## https://timogrossenbacher.ch/2019/04/bivariate-maps-with-ggplot2-and-sf/ | |
# dependencies | |
library(dplyr) # data wrangling | |
library(ggplot2) # plotting | |
library(purrr) # iteration | |
library(tidyr) # data wrangling | |
library(cowplot) # plot construction | |
library(sf) # spatial tools | |
library(tidycensus) # demographic data | |
library(tigris) # geometric data | |
# create map theme | |
## I modified Timo and Angelo's theme a bit to suite my taste and design style | |
## I also hard-coded the font name and color values in to the theme | |
theme_map <- function(...) { | |
theme_minimal() + | |
theme( | |
# text defaults | |
text = element_text(family = "sans", color = "#000000"), | |
# remove all axes | |
axis.line = element_blank(), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), | |
axis.ticks = element_blank(), | |
# add a grid that blends into plot background | |
panel.grid.major = element_line(color = "#ebebeb", size = 0.2), | |
panel.grid.minor = element_blank(), | |
# background colors | |
plot.background = element_rect(fill = "#ebebeb", color = NA), | |
panel.background = element_rect(fill = "#ebebeb", color = NA), | |
legend.background = element_rect(fill = "#ebebeb", color = NA), | |
# borders and margins | |
plot.margin = unit(c(.5, .5, .2, .5), "cm"), | |
panel.border = element_blank(), | |
panel.spacing = unit(c(-.1, 0.2, .2, 0.2), "cm"), | |
# titles | |
legend.title = element_text(size = 11), | |
legend.text = element_text(size = 9, hjust = 0, color = "#222222"), | |
plot.title = element_text(size = 24, hjust = 0.5, color = "#222222", face = "bold"), | |
plot.subtitle = element_text(size = 18, hjust = 0.5, color = "#222222", | |
margin = margin(b = -0.1, t = -0.1, l = 2, unit = "cm"), | |
face = "bold", debug = FALSE), | |
# captions | |
plot.caption = element_text(size = 10, hjust = .5, margin = margin(t = 0.2, b = 0, unit = "cm"), | |
color = "#939184"), | |
... | |
) | |
} | |
# download median household income data for St. Louis City and County | |
c(189, 510) %>% | |
map_df(~get_acs(year = 2017, geography = "tract", variables = "B19019_001", state = 29, county = .x)) %>% | |
select(GEOID, estimate) %>% | |
rename(medInc = estimate) -> medianInc | |
# download race data for St. Louis City and County and calculate percent white | |
c(189, 510) %>% | |
map_df(~get_acs(year = 2017, geography = "tract", table = "B02001", state = 29, county = .x, output = "wide")) %>% | |
select(GEOID, B02001_001E, B02001_002E) %>% | |
rename(total = B02001_001E, white = B02001_002E) %>% | |
mutate(pctWhite = white/total*100) -> race | |
# download tract geometry | |
tracts <- tracts(state = 29, class = "sf") %>% | |
filter(COUNTYFP %in% c(189, 510)) %>% | |
select(GEOID) | |
# combine tract data with geometry | |
tracts <- tracts %>% | |
left_join(., race, by = "GEOID") %>% | |
left_join(., medianInc, by = "GEOID") | |
# clean-up enviornment | |
# rm(race, medianInc) | |
# download country geometry | |
counties <- counties(state = 29, class = "sf") %>% | |
filter(COUNTYFP %in% c(189, 510)) %>% | |
select(NAMELSAD) | |
# create three bins for percent white | |
quantiles_white <- tracts %>% | |
pull(pctWhite) %>% | |
quantile(probs = seq(0, 1, length.out = 4)) | |
# create three bins for mean income | |
quantiles_medInc <- tracts %>% | |
pull(medInc) %>% | |
quantile(probs = seq(0, 1, length.out = 4)) | |
# create color scale that encodes two variables | |
# red for percent white and blue for median household income | |
bivariate_color_scale <- tibble( | |
"3 - 3" = "#3F2949", # high % white, high income | |
"2 - 3" = "#435786", | |
"1 - 3" = "#4885C1", # low % white, high income | |
"3 - 2" = "#77324C", | |
"2 - 2" = "#806A8A", # medium % white, medium income | |
"1 - 2" = "#89A1C8", | |
"3 - 1" = "#AE3A4E", # high % white, low income | |
"2 - 1" = "#BC7C8F", | |
"1 - 1" = "#CABED0" # low % white, low income | |
) %>% | |
gather("group", "fill") | |
# cut into groups defined above and join fill | |
tracts <- tracts %>% | |
mutate(pctWhite_quantiles = cut(pctWhite, breaks = quantiles_white, include.lowest = TRUE)) %>% | |
mutate(medInc_quantiles = cut(medInc, breaks = quantiles_medInc, include.lowest = TRUE)) %>% | |
mutate(group = paste(as.numeric(pctWhite_quantiles), "-", as.numeric(medInc_quantiles))) %>% | |
left_join(., bivariate_color_scale, by = "group") | |
# create high median income tract object | |
highTracts <- filter(tracts, medInc >= 150000) %>% | |
select(GEOID) | |
# re-project | |
## !! annotations will not work with decimal degrees, use UTM coordinates instead | |
tracts <- st_transform(tracts, crs = 26915) | |
# create annotations | |
## coordinates for arrows must be specified in UTM coordinates | |
## use https://www.geoplaner.com to find needed coordinate pairs - it is an iterative | |
## process of testing coordinate pairs until the desired cartographic effect is achieved | |
annotations <- tibble( | |
label = c( | |
"Grey areas have\nlow % white and\nlow income", | |
"Violet areas have\nhigh % white and\nhigh income", | |
"Blue areas have\nlow % white and\nhigh income", | |
"The only high\n% white and low\nincome tract", | |
"Gray-violet areas\nhave both moderate\n% white and income", | |
"Red outlined tracts have\na median household income \ngreater than $150,000" | |
), | |
arrow_from = c( | |
"745750, 4291382", # grey | |
"713518, 4262733", # violet | |
"741149, 4307104", # blue | |
"718167, 4297318", # bright red | |
"725925, 4302593", # gray violent | |
"701042, 4289956" # high tracts | |
), | |
arrow_to = c( | |
"739153, 4286241", # grey | |
"711985, 4266273", # violet | |
"737122, 4302125", # blue | |
"721222, 4294786", # bright red | |
"727449, 4298328", # gray violet | |
"702816, 4281282" # high tracts | |
), | |
curvature = c( | |
0.2, # grey | |
-0.2, # violent | |
0.2, # blue | |
0.2, # bright red | |
-0.2, # gray violet | |
0.3 # high tracts | |
), | |
nudge = c( | |
"500,500", # grey | |
"500,500", # violent | |
"500,500", # blue | |
"500,500", # bright red | |
"500,500", # gray violet | |
"500,500" # high tracts | |
), | |
just = c( | |
"0,1", # grey | |
"0,1", # violent | |
"0,1", # blue | |
"1,0", # bright red | |
"1,0", # gray violet | |
"0,1" # high tracts | |
) | |
) %>% | |
separate(arrow_from, into = c("x", "y")) %>% | |
separate(arrow_to, into = c("xend", "yend")) %>% | |
separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>% | |
separate(just, into = c("hjust", "vjust"), sep = "\\,") | |
# create initial map object | |
map <- ggplot() + | |
# color municipalities according to their race / income combination, with white boarders | |
geom_sf(data = tracts, aes(fill = fill), color = "white", size = 0.1) + | |
# as the sf object municipality_prod_geo has a column with name "fill" that | |
# contains the literal color as hex code for each municipality, we can use | |
# scale_fill_identity here | |
scale_fill_identity() + | |
# add in high median income tract data that highlight boundaries of those tracts | |
geom_sf(data = highTracts, fill = NA, color = "#ff0000", size = .5) + | |
# overlay county coundaries | |
geom_sf(data = counties, fill = NA, color = "white", size = .75) + | |
# labels | |
labs( | |
title = "Racial and Income Segregation (2017)", | |
subtitle = "St. Louis City and County Census Tracts", | |
y = "", | |
x = "", | |
caption = "Map by Christopher Prener, Ph.D.\nData via the U.S. Census Bureau's 2017 ACS 5-year Estimates \nBreaks for percent white are 41.7% and 84.4%. Breaks for \nmedian household income are $41,250 and $62,408." | |
) + | |
# apply theme created above | |
theme_map() | |
# add annotations one by one by walking over the annotations data frame | |
# this is necessary because we cannot define nudge_x, nudge_y and curvature | |
# in the aes in a data-driven way like as with x and y, for example | |
annotations %>% | |
pwalk(function(...) { | |
# collect all values in the row in a one-rowed data frame | |
current <- tibble(...) | |
# convert all columns from x to vjust to numeric | |
# as pwalk apparently turns everything into a character (why???) | |
current <- current %>% | |
mutate_at(vars(x:vjust), as.numeric) | |
# update the plot object with global assignment | |
map <<- map + | |
# for each annotation, add an arrow | |
geom_curve(data = current, aes(x = x, xend = xend, y = y, yend = yend), | |
# that's the whole point of doing this loop: | |
curvature = current %>% pull(curvature), | |
size = 0.2, | |
arrow = arrow(length = unit(0.005, "npc")) | |
) + | |
# for each annotation, add a label | |
geom_text(data = current, aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust), | |
# that's the whole point of doing this loop: | |
nudge_x = current %>% pull(nudge_x), | |
nudge_y = current %>% pull(nudge_y), | |
# other styles | |
family = "sans", | |
color = "#000000", | |
size = 3.5 | |
) | |
}) | |
# use if annotations extend of map | |
map <- map + coord_sf(clip = 'off') | |
# separate the groups used for plotting | |
bivariate_color_scale <- bivariate_color_scale %>% | |
separate(group, into = c("pctWhite", "medInc"), sep = " - ") %>% | |
mutate(pctWhite = as.integer(pctWhite), | |
medInc = as.integer(medInc)) | |
# draw legend | |
## the legend syntax with the right arrow on the original tutorial did not work for me | |
## the following workaround draws the arrows without using unicode symbols | |
legend <- ggplot() + | |
geom_tile(data = bivariate_color_scale, mapping = aes(x = pctWhite, y = medInc, fill = fill)) + | |
scale_fill_identity() + | |
labs(x = expression(paste("Higher % White ", ""%->%"")), | |
y = expression(paste("Higher Income ", ""%->%""))) + | |
theme_map() + | |
# make font small enough | |
theme(axis.title = element_text(size = 12)) + | |
# quadratic tiles | |
coord_fixed() | |
# combine map with legend | |
finalPlot <- ggdraw() + | |
draw_plot(map, 0, 0, 1, 1) + | |
draw_plot(legend, 0.18, .7, 0.2, 0.2) | |
# save plot | |
## !! replace with ggsave - the prener package is a set of personal functions for Chris | |
## ggsave(filename = "plot.png", x, dpi = 1000) | |
prener::cp_plotSave(filename = "plot.png", finalPlot, preset = "lg") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment