Skip to content

Instantly share code, notes, and snippets.

@jepusto
Created August 24, 2015 02:17
Show Gist options
  • Save jepusto/2825b7e2a2b49ff2e61a to your computer and use it in GitHub Desktop.
Save jepusto/2825b7e2a2b49ff2e61a to your computer and use it in GitHub Desktop.
library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
cities_select <- c("HOUSTON","SAN ANTONIO","DALLAS","AUSTIN","FORT WORTH","EL PASO")
#----------------------------------------
# get population estimates
#----------------------------------------
# get 2000-2010
pop_ests_2000 <- read.csv("http://www.census.gov/popest/data/intercensal/cities/files/SUB-EST00INT.csv",
stringsAsFactors = FALSE)
filter(pop_ests_2000, STNAME=="Texas" & SUMLEV==162 & toupper(NAME) %in% paste(cities_select, "CITY")) %>%
select(NAME, POPESTIMATE2000:POPESTIMATE2009, POPESTIMATE2010) %>%
gather("Year","Pop", POPESTIMATE2000:POPESTIMATE2010) %>%
mutate(City = toupper(str_sub(NAME, end = -6L)),
Year = as.integer(str_sub(Year, start = -4L)),
source = "2000-2010") %>%
select(-NAME) ->
pop_by_city_2000
# get 2010-2014
pop_ests_2010 <- read.csv("http://www.census.gov/popest/data/cities/totals/2014/files/SUB-EST2014_48.csv",
stringsAsFactors = FALSE)
filter(pop_ests_2010, SUMLEV==162 & toupper(NAME) %in% paste(cities_select, "CITY")) %>%
select(NAME, POPESTIMATE2010:POPESTIMATE2014) %>%
gather("Year","Pop", POPESTIMATE2010:POPESTIMATE2014) %>%
mutate(City = toupper(str_sub(NAME, end = -6L)),
Year = as.integer(str_sub(Year, start = -4L)),
source = "2010-2014") %>%
select(-NAME) ->
pop_by_city_2010
# adjust so 2010 agrees across sources
filter(pop_by_city_2000, Year==2010) %>%
select(City, Pop_old = Pop) %>%
left_join(select(filter(pop_by_city_2010, Year==2010), City, Pop), by = "City") %>%
mutate(ratio = Pop / Pop_old) %>%
select(City, ratio) ->
pop_adj_ratio
left_join(pop_by_city_2000, pop_adj_ratio, by = "City") %>%
mutate(Pop = Pop * ratio * max((Year - 2005) / 5, 0)) %>%
filter(2006 <= Year, Year <= 2009) %>%
select(-ratio) ->
pop_by_city_2000_adj
pop_by_city <- rbind(pop_by_city_2000_adj, pop_by_city_2010)
# add projection for 2015
lm_pop <- lm(Pop ~ 0 + City + Year:City, data = pop_by_city_2010)
pop2015 <- filter(pop_by_city, Year==2014) %>% mutate(Year = 2015)
pop2015$Pop <- predict(lm_pop, newdata = pop2015)
pop_by_city <- rbind(pop_by_city, pop2015)
ggplot(pop_by_city, aes(Year, Pop, color = City, shape = City, linetype = source)) +
geom_point() + geom_line() +
theme_minimal()
#---------------------------------------
# prepare crash data
#---------------------------------------
crash_dat <- read.csv("http://blogs.edb.utexas.edu/pusto/files/2015/08/Yearly_crash_data_by_city.csv",
stringsAsFactors=FALSE)
select(crash_dat, City, Year, Fatal_crashes:Incapacitating_injuries, Total_crashes) %>%
filter(City %in% cities_select) %>% droplevels() %>%
mutate(Fatality_rate = Fatal_crashes / Total_crashes) %>%
gather("quantity","n", Fatal_crashes:Total_crashes, Fatality_rate) %>%
filter(!is.na(n)) %>%
left_join(pop_by_city, by = c("City","Year")) %>%
mutate(partial = (Year==2015),
quantity = str_replace(quantity,"_"," "),
n_per = n / Pop * 100000,
projection = "actual") ->
crashes
#-------------------------------
# Crashes per 100K residents
#-------------------------------
filter(crashes, Year==2015) %>% mutate(n = 0, n_per = 0) ->
crashes_axis
filter(crashes, Year==2015) %>%
mutate(n = n * 12 / 7, n_per = n_per * 12/ 7, projection = "projected") ->
crashes_projected
crashes <- rbind(crashes, crashes_projected)
filter(crashes, quantity != "Fatality rate",
Year < 2015 | projection == "projected") %>% droplevels() ->
crashes_total
crash_labels <- filter(crashes_total, Year==2015)
crashes_per <-
ggplot(crashes_total, aes(Year, n_per, color = City, shape = City)) +
geom_line() + geom_point() +
geom_text(data = crash_labels, aes(x = 2015, y = n_per, label = City), hjust=-0.1, size = 3) +
geom_blank(data = filter(crashes_axis, quantity!="Fatality rate")) +
scale_color_brewer(type = "qual", palette=7) +
scale_x_continuous(limits = c(2006, 2016),
breaks = 2006:2015, labels = c(2006:2014, "2015 \n (Projected)")) +
facet_wrap(~ quantity, ncol = 1, scales = "free") +
theme_minimal() + theme(legend.position="none") +
labs(color = "", shape = "", y = "Crashes/Fatalities per 100,000 residents",
title = "Annual crashes and fatalities/injuries per 100,000 residents \n in selected Texas cities (2006-2015)")
arrange(crashes_total, quantity, City, Year) %>%
group_by(quantity, City) %>%
mutate(change = n_per - lag(n_per)) %>%
filter(!is.na(change) & quantity %in% c("Fatal crashes","Fatalities")) ->
crashes_change
summary(filter(crashes_change, quantity=="Fatal crashes")$change)
summary(filter(crashes_change, quantity=="Fatalities")$change)
ggplot(crashes_change, aes(change, color = quantity)) + geom_density()
#-------------------------------
# Total crashes
#-------------------------------
crashes_total_fig <-
ggplot(crashes_total, aes(Year, n, color = City, shape = City)) +
geom_line() + geom_point() +
geom_text(data = crash_labels, aes(x = 2015, y = n, label = City), hjust=-0.1, size = 3) +
geom_blank(data = filter(crashes_axis, quantity!="Fatality rate")) +
scale_color_brewer(type = "qual", palette=7) +
scale_x_continuous(limits = c(2006, 2016),
breaks = 2006:2015, labels = c(2006:2014, "2015 \n (Projected)")) +
facet_wrap(~ quantity, ncol = 1, scales = "free") +
theme_minimal() + theme(legend.position="none") +
labs(color = "", shape = "", y = "Total Crashes/Fatalities",
title = "Annual total crashes and fatalities/injuries \n in selected Texas cities (2006-2015)")
#-------------------------------
# Fatality rate figure
#-------------------------------
fatality_rate <- filter(crashes, quantity=="Fatality rate" & projection=="actual")
crash_labels <- filter(fatality_rate, Year==2015)
fatality_rates <-
ggplot(fatality_rate, aes(Year, n, color = City, shape = City)) +
geom_line() + geom_point() +
geom_blank(data = filter(crashes_axis, quantity=="Fatality rate")) +
geom_text(data = crash_labels, aes(x = 2015, y = n, label = City), hjust=-0.1, size = 3) +
scale_color_brewer(type = "qual", palette=7) +
scale_x_continuous(limits = c(2006, 2016), breaks = 2006:2015,
labels = c(2006:2014, "2015 \n (Through 7/31)")) +
theme_minimal() + theme(legend.position="none") +
labs(color = "", shape = "", y = "Fatal crashes / Total crashes",
title = "Proportion of crashes that are fatal \n in selected Texas cities (2010-2015)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment