Skip to content

Instantly share code, notes, and snippets.

@brshallo
Created July 26, 2023 20:16
Show Gist options
  • Save brshallo/80401859d428a55967ce0d8bcfe16aee to your computer and use it in GitHub Desktop.
Save brshallo/80401859d428a55967ce0d8bcfe16aee to your computer and use it in GitHub Desktop.
Before finding Seattle's API data source I'd at first pulled their permits issued data via their excel sheets. This was kind of hassle... See related gist here: https://gist.github.com/brshallo/7a14235134f8e10139f71c3369f8d50f
library(tidyverse)

urls <- tibble(month = month.name, month_num = 1:12) %>% 
  cross_join(tibble(year = 2019:2023)) %>% 
  arrange(year) %>% 
  mutate(year_month = make_date(year = year, month = month_num)) %>% 
  filter(year_month < floor_date(today(), "months")) %>% 
  mutate(urls = paste0("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/", year, month, "Summary", ".xlsx"))

url_to_units <- function(url, sheet = NA){
  
  if(is.na(sheet)){
    data <- openxlsx::read.xlsx(url, startRow = 6)
  } else {
    data <- openxlsx::read.xlsx(url, sheet = sheet, startRow = 6)
  }
  
  data %>% 
    as_tibble() %>% 
    janitor::clean_names() %>% 
    # filter(decision_type == "Grand Total") %>% 
    filter(decision_type == "New", dept_of_commerce != "Single_Family/Duplex") %>%
    mutate(across(starts_with("units_"), ~ifelse(is.na(.x), 0, .x))) %>% 
    summarise(new_added = sum(units_added - units_removed)) %>% 
    pluck("new_added")
}

safe_url_to_units <- safely(url_to_units)

data <- urls %>% 
  mutate(new_units = map(urls, safe_url_to_units))

### Fix inconsistencies in sheets for a couple months
did_safely_error <- function(safely_named_list){
  
  map(safely_named_list, "error") %>% 
    map_lgl(is.null) %>% 
    {!.}
}

errors <- data %>% 
  filter(did_safely_error(new_units))

data_sep2019 <- errors %>% 
  filter(month_num == 9, year == 2019) %>% 
  mutate(new_units = url_to_units("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/2019SeptSummary.xlsx"))

data_feb2020 <- errors %>% 
  filter(month_num == 2, year == 2020) %>% 
  mutate(new_units = url_to_units(urls, sheet = "February Summary"))
### 

data_clean <- data %>% 
  filter(!did_safely_error(new_units)) %>% 
  mutate(new_units = map_dbl(new_units, ~.x[[1]])) %>% 
  bind_rows(data_sep2019, data_feb2020) %>% 
  arrange(year, month_num)

data_clean %>% 
  mutate(year_quarter_label = tsibble::yearquarter(year_month),
         year_quarter = ceiling_date(year_month, "quarters")) %>% 
  group_by(year_quarter, year_quarter_label) %>% 
  summarise(new_units = sum(new_units)) %>% 
  ggplot(aes(x = year_quarter, y = new_units))+
  geom_line()+
  theme_bw()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment