Created
May 28, 2018 16:40
-
-
Save dylanjm/c5e5e5f74dbadc8145116f7ee964e734 to your computer and use it in GitHub Desktop.
A script automating the analysis of Commercial Bank Expansion Feasibility
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(blscrapeR) | |
library(tidyverse) | |
library(albersusa) | |
library(ggalt) | |
library(wesanderson) | |
library(cowplot) | |
library(tidycensus) | |
# The original blscrapeR::qcew_api() is not really suited to take multiple years | |
# or niac codes at once. Here is a function that grabs it all! | |
get_industry_qcew <- function(years, niac_codes, qtr = "a"){ | |
# We want the years to repeat for each industry we're looking for | |
year_rep <- rep(years, length(niac_codes)) | |
# We also want each code to be repeated for each year of interest | |
niac_each <- rep(niac_codes, each = length(years)) | |
# Utilize purrr::map2() and purrr::possibly() to either grab our data | |
# or carry on when it hits an error | |
industry_dat <- map2(year_rep, niac_each, | |
possibly( | |
~ qcew_api(year = .x, qtr = qtr, | |
slice = "industry", sliceCode = .y), | |
otherwise = NULL | |
) | |
) %>% | |
compact() %>% # remove any NULL values from our list | |
map_df(bind_rows) # create one large data set | |
return(industry_dat) | |
} | |
qcew_dat <- get_industry_qcew(2013:2016, c(5221, 52213, 52231)) | |
qcew_dat_clean <- qcew_dat %>% | |
# Reorder columns and drop unwanted variables | |
select(area_fips, year, qtr, | |
industry_code, everything(), | |
-own_code, -agglvl_code, -size_code, -disclosure_code, | |
-lq_disclosure_code, -oty_disclosure_code) | |
idaho_dat <- qcew_dat_clean %>% | |
left_join(area_titles, by = "area_fips") %>% | |
separate(area_title, c("county", "state"), sep = ",") %>% | |
mutate_at(vars(state, industry_code), trimws) %>% | |
left_join(niacs, by = "industry_code") %>% | |
mutate_at(vars(industry_title), as.character) %>% | |
select(area_fips, state, county, | |
year, qtr, industry_code, industry_title, everything()) %>% | |
filter(state %in% "Idaho") | |
# This will give me all of the lat/long geometry's for each county in Idaho | |
us <- counties_composite() | |
us_map <- broom::tidy(us, region = "fips") %>% | |
filter(id %in% idaho_dat$area_fips) | |
com_banking <- idaho_dat %>% | |
filter(industry_code %in% "5221") | |
ggplot() + | |
geom_map(data = us_map, map = us_map, | |
aes(long, lat, map_id = id), | |
color = "black", fill = NA) + | |
geom_map(data = com_banking, map = us_map, | |
aes(fill = lq_annual_avg_estabs, map_id = area_fips)) + | |
scale_fill_gradientn(colors = wes_palettes$Zissou1) + | |
coord_map() + | |
facet_grid(.~ year) + | |
labs(title = glue::glue("{tools::toTitleCase(com_banking$industry_title)}"), | |
fill = "LQ", x = NULL, y = NULL) + | |
theme_minimal() + | |
theme(strip.text = element_text(face = "bold"), | |
axis.title = element_blank(), | |
axis.text = element_blank(), | |
legend.title = element_text(face = "bold"), | |
plot.title = element_text(family = "Helvetica Bold Oblique"), | |
plot.margin=grid::unit(c(0,0,0,0), "mm")) | |
# Not the most pretty function | |
# This is something I'm still learning on function development. | |
# My gut feeling is that I don't want to assume scope when writing a function | |
# So I don't want to call variables from my Global Environment. | |
# Therefore I'm going to give this function, everything I have create thus far. | |
plot_industry_map <- function(dat, map_dat, ind_code, fill_var, fill_pall = "Zissou1"){ | |
# We want to plot a specific industry at a time | |
ind_dat <- dat %>% | |
filter(industry_code %in% ind_code) | |
# Here is where we use tidyeval to enquo the fill variable. | |
fill_var_enc <- enquo(fill_var) | |
# Here is the plot in question | |
ggplot() + | |
geom_map(data = map_dat, map = map_dat, | |
aes(long, lat, map_id = id), | |
color = "black", fill = NA) + | |
geom_map(data = ind_dat, map = map_dat, | |
aes(fill = !!fill_var_enc, map_id = area_fips)) + | |
scale_fill_gradientn(colors = wes_palette(fill_pall)) + | |
coord_map() + | |
facet_grid(.~ year) + | |
labs(title = glue::glue("{tools::toTitleCase(ind_dat$industry_title)}"), | |
fill = "LQ") + | |
theme_minimal() + | |
theme(strip.text = element_text(face = "bold"), | |
axis.title = element_blank(), | |
axis.text = element_blank(), | |
legend.title = element_text(face = "bold"), | |
plot.title = element_text(family = "Helvetica Bold Oblique")) | |
} | |
# Iterate across all industries and make a heatmap plot | |
plts <- map(c("5221","52213","52231"), plot_industry_map, | |
dat = idaho_dat, | |
map_dat = us_map, | |
fill_var = lq_annual_avg_estabs | |
) | |
# Arrange all the plots into a grid | |
cowplot::plot_grid(plotlist = plts, ncol = 1) %>% | |
add_sub(glue::glue("Source: Bureau of Labor Statistics QCEW Database", | |
"\n*Data not available for years with non-filled counties"), | |
y = .5, hjust = -.8, size = 8, fontface = "italic") %>% | |
ggdraw() | |
# Same idea as above but we are going to look at the annual employment level percent change | |
plts2 <- map(c("5221","52213","52231"), plot_industry_map, | |
dat = idaho_dat, | |
map_dat = us_map, | |
fill_var = oty_annual_avg_emplvl_pct_chg/100, | |
fill_pall = "Moonrise3") | |
cowplot::plot_grid(plotlist = plts2, ncol = 1) %>% | |
add_sub("Source: Bureau of Labor Statistics QCEW Database\n*Data not available for years with non-filled counties", | |
y = .5, hjust = -.8, size = 8, fontface = "italic") %>% | |
ggdraw() | |
# Gathering data from FDIC website, getting rid of corrupted data point | |
idaho_banks <- read_csv(here::here("Data/ID_2017_Data.csv")) %>% | |
filter(SIMS_LONGITUDE < -1) | |
# Gathering data from US Census | |
idaho_median_income <- get_acs(geography = "county", | |
variables = c(mediancome = "B19013_001"), | |
state = "ID", geometry = TRUE) | |
# Heat map plot including bank locations and median income | |
ggplot() + | |
geom_sf(data = idaho_median_income, aes(fill = estimate)) + | |
geom_point(data = idaho_banks, | |
aes(y = SIMS_LATITUDE, x = SIMS_LONGITUDE), | |
color = "grey95", size = .8) + | |
coord_sf(datum = NA) + | |
scale_fill_gradientn(colors = wes_palettes$Rushmore1, | |
labels = scales::dollar) + | |
labs(title = "Median Income Across Counties in Idaho", | |
subtitle = "Each white dot represents a physical branch\nlocation of a bank registered by the FDIC, updated June 2017", | |
fill = "Estimate") + | |
theme(axis.title = element_blank(), | |
plot.subtitle = element_text(face = "italic")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment