Created
June 7, 2016 15:14
-
-
Save expersso/64beb9c70487d23adc5b73cc018f6094 to your computer and use it in GitHub Desktop.
Income and demographics in U.S. metropolitan areas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
read_sheet <- function(file, sheet) { | |
readxl::read_excel(file, sheet, skip = 6) %>% clean_data() | |
} | |
fix_names <- function(df) { | |
names(df)[1] <- "location" | |
names(df)[-1] <- paste(rep(c(2000, 2014), each = length(names(df)[-1]) / 2), | |
names(df)[-1], sep = "_") | |
df | |
} | |
clean_data <- . %>% | |
discard(~all(is.na(.x))) %>% | |
fix_names() %>% | |
gather(key = key, value = value, -location) %>% | |
separate(key, c("year", "variable"), "_") %>% | |
mutate(variable = gsub("\\s+", " ", variable), | |
variable = gsub("\\(.*\\)", "", variable)) %>% | |
filter(!is.na(location)) | |
url <- paste0("http://www.pewsocialtrends.org/files/2016/05/", | |
"Middle-Class-U.S.-Metro-Areas-5-12-16-Supplementary-Tables.xlsx") | |
tmp <- tempfile(fileext = ".xlsx") | |
download.file(url, tmp, mode = "wb") | |
df <- c("6. Demographics, metro", "3. Median HH income, metro") %>% | |
map(read_sheet, file = tmp) %>% | |
set_names(c("demo", "inc")) | |
inc <- df$inc %>% | |
spread(year, value) %>% | |
mutate(inc = (`2014` - `2000`) / `2000`) %>% | |
select(location, variable, inc) | |
demo <- df$demo %>% | |
spread(year, value) %>% | |
mutate(demo = (`2014` - `2000`)) %>% | |
select(location, variable, demo) | |
df_plot <- left_join(demo, inc, by = "location") %>% | |
filter(!variable.x == "Total population") %>% | |
left_join( | |
df$demo %>% | |
filter(variable == "Total population") %>% | |
select(location, "total_pop" = value), | |
by = "location" | |
) | |
ggplot(df_plot, aes(x = demo * 100, y = inc * 100, size = total_pop)) + | |
geom_point(alpha = 0.25, show.legend = FALSE) + | |
geom_smooth(aes(weight = total_pop), show.legend = FALSE) + | |
facet_wrap(~variable.y + variable.x, scales = "free") + | |
scale_size_area() + | |
theme_light() + | |
labs(x = "\npp change in demographics", y = "% change in median income\n", | |
title = "Income and demographics in U.S. metropolitan areas", | |
subtitle = "Changes between 1999/2000 and 2014", | |
caption = "Source: Pew Research (http://www.pewsocialtrends.org/)") |
Author
expersso
commented
Jun 7, 2016
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment