library(tidyverse)
urls <- tibble(month = month.name, month_num = 1:12) %>%
cross_join(tibble(year = 2019:2023)) %>%
arrange(year) %>%
mutate(year_month = make_date(year = year, month = month_num)) %>%
filter(year_month < floor_date(today(), "months")) %>%
mutate(urls = paste0("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/", year, month, "Summary", ".xlsx"))
url_to_units <- function(url, sheet = NA){
if(is.na(sheet)){
data <- openxlsx::read.xlsx(url, startRow = 6)
} else {
data <- openxlsx::read.xlsx(url, sheet = sheet, startRow = 6)
}
data %>%
as_tibble() %>%
janitor::clean_names() %>%
# filter(decision_type == "Grand Total") %>%
filter(decision_type == "New", dept_of_commerce != "Single_Family/Duplex") %>%
mutate(across(starts_with("units_"), ~ifelse(is.na(.x), 0, .x))) %>%
summarise(new_added = sum(units_added - units_removed)) %>%
pluck("new_added")
}
safe_url_to_units <- safely(url_to_units)
data <- urls %>%
mutate(new_units = map(urls, safe_url_to_units))
### Fix inconsistencies in sheets for a couple months
did_safely_error <- function(safely_named_list){
map(safely_named_list, "error") %>%
map_lgl(is.null) %>%
{!.}
}
errors <- data %>%
filter(did_safely_error(new_units))
data_sep2019 <- errors %>%
filter(month_num == 9, year == 2019) %>%
mutate(new_units = url_to_units("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/2019SeptSummary.xlsx"))
data_feb2020 <- errors %>%
filter(month_num == 2, year == 2020) %>%
mutate(new_units = url_to_units(urls, sheet = "February Summary"))
###
data_clean <- data %>%
filter(!did_safely_error(new_units)) %>%
mutate(new_units = map_dbl(new_units, ~.x[[1]])) %>%
bind_rows(data_sep2019, data_feb2020) %>%
arrange(year, month_num)
data_clean %>%
mutate(year_quarter_label = tsibble::yearquarter(year_month),
year_quarter = ceiling_date(year_month, "quarters")) %>%
group_by(year_quarter, year_quarter_label) %>%
summarise(new_units = sum(new_units)) %>%
ggplot(aes(x = year_quarter, y = new_units))+
geom_line()+
theme_bw()
Created
July 26, 2023 20:16
-
-
Save brshallo/80401859d428a55967ce0d8bcfe16aee to your computer and use it in GitHub Desktop.
Before finding Seattle's API data source I'd at first pulled their permits issued data via their excel sheets. This was kind of hassle... See related gist here: https://gist.github.com/brshallo/7a14235134f8e10139f71c3369f8d50f
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment