Skip to content

Instantly share code, notes, and snippets.

@walkerke
Last active August 24, 2016 19:33
Show Gist options
  • Save walkerke/5a7b1eb3d3f9f1ae8adfa7c3a7832208 to your computer and use it in GitHub Desktop.
Save walkerke/5a7b1eb3d3f9f1ae8adfa7c3a7832208 to your computer and use it in GitHub Desktop.
library(ggplot2)
library(animation)
library(readxl)
library(tidyr)
library(dplyr)
library(stringr)
library(magrittr)
# Data source: Jonathan Schroeder at http://conservancy.umn.edu/handle/11299/181605
df <- read_excel('cbsa2013_hist_pops.xlsx')
dft <- df %>%
gather(key = year, value = population, epop1790:pop2010) %>%
mutate(year = as.numeric(str_sub(year, start = -4)))
dft$name_short <- str_split_fixed(dft$CBSA_NAME, ",", n = 2)[,1]
dft$name_short <- str_split_fixed(dft$name_short, "-", n = 2)[,1]
dft$name_short <- str_split_fixed(dft$name_short, "/", n = 2)[,1]
top20 <- dft %>%
group_by(year) %>%
mutate(rank = min_rank(desc(population))) %>%
ungroup() %>%
filter(rank <= 20)
northeast <- c("Albany", "Boston", "Bridgeport", "Claremont", "Hartford", "New York",
"Philadelphia", "Pittsburgh", "Portland", "Providence", "Springfield",
"Torrington", "Worcester", "Rochester", "Syracuse", "Buffalo", "Scranton")
midwest <- c("Cincinnati", "Columbus", "Chicago", "St. Louis", "Indianapolis", "Detroit",
"Kansas City", "Cleveland", "Minneapolis", "Milwaukee")
south <- c("Baltimore", "Charleston", "Charlotte", "Richmond", "Salisbury", "Virginia Beach", "Washington",
"Charlottesville", "Lexington", "Nashville", "Louisville", "Atlanta", "New Orleans",
"Dallas", "Houston", "Miami", "Tampa")
west <- c("San Francisco", "Los Angeles", "Seattle", "San Diego", "Phoenix", "Riverside")
top20 %<>%
mutate(region = ifelse(name_short %in% northeast, "Northeast",
ifelse(name_short %in% midwest, "Midwest",
ifelse(name_short %in% south, "South",
ifelse(name_short %in% west, "West", NA)))),
poplabel = ifelse(population < 1000000, paste0(as.character(round(population / 1000, 0)), "k"),
paste0(as.character(round(population / 1000000, 2)), "m")),
position_label = ifelse(population < 100000, 1000000, 1200000),
name_short = str_pad(top20$name_short, 15, "left"))
saveGIF({
for (i in seq(1790, 2010, 10)) {
yearly <- filter(top20, year == i)
g <- ggplot() +
geom_bar(data = yearly, aes(y = population, x = reorder(name_short, population),
fill = region, frame = year), stat = "identity") +
geom_text(data = yearly, aes(y = population + position_label, x = reorder(name_short, population),
label = poplabel), fontface = "bold") +
coord_flip() +
scale_y_continuous(limits = c(0, 25000000), expand = c(0, 0)) +
scale_fill_manual(values = c("Northeast" = "#e41a1c", "Midwest" = "#377eb8",
"South" = "#4daf4a", "West" = "#984ea3")) +
theme_minimal(base_size = 16, base_family = "Tahoma") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
plot.title = element_text(face = "bold"),
axis.text.x = element_blank(),
legend.position = "bottom") +
labs(y = "",
x = "",
fill = "",
caption = "Data source: Jonathan Schroeder, University of Minnesota | Chart by @kyle_e_walker",
title = paste0("20 largest US metro areas by population, ", as.character(i)))
print(g)
}
}, movie.name = "metro_pop.gif", interval = 0.8, ani.width = 700, ani.height = 600)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment