Skip to content

Instantly share code, notes, and snippets.

@brshallo
brshallo / row-nums-union.md
Last active October 11, 2024 18:16
Example of how doing UNION ALL can return fewer rows

SQL Example: Summarizing Data with UNION ALL vs. GROUP BY

Data Setup

We create a table with products and segments:

-- Create the sample table
CREATE TABLE sales_data (
    product VARCHAR(10),
    segment VARCHAR(10),
@brshallo
brshallo / risk-ratios-sim.md
Created March 26, 2024 02:34
simulating distribution of bootstrapped risk ratios https://x.com/nntaleb/status/1770385630163312902?s=20
library(tidyverse)

sim_risk_ratios <- function(x){
  events <- map2(rep(c(TRUE, FALSE), 5), c(31, 414 - 31, 82, 1492 - 82, 252, 4832 - 252, 423, 11831 - 423, 52, 1509-52), rep) %>% unlist()
  outcomes <- tibble(
    group = map2(c("<8 h", "8<10 h", "10<12 h", "12-16 h", ">16 h"), c(414, 1492, 4832, 11831, 1509), rep) %>% unlist()) %>% 
    mutate(event_sim = sample(events, n(), replace = TRUE)) %>% 
    group_by(group) %>% 
 summarise(risk = mean(event_sim))
library(magrittr)
find_in_files <- function(path, pattern){
path %>%
fs::dir_ls(recurse = TRUE, type = "file", regexp = "(\\.[rR])$") %>%
purrr::map(~grep(pattern, readLines(.x, warn = FALSE), value = TRUE)) %>%
purrr::keep(~length(.x) > 0)
}
@brshallo
brshallo / permits-issued.md
Created July 26, 2023 20:16
Before finding Seattle's API data source I'd at first pulled their permits issued data via their excel sheets. This was kind of hassle... See related gist here: https://gist.github.com/brshallo/7a14235134f8e10139f71c3369f8d50f
library(tidyverse)

urls <- tibble(month = month.name, month_num = 1:12) %>% 
  cross_join(tibble(year = 2019:2023)) %>% 
  arrange(year) %>% 
  mutate(year_month = make_date(year = year, month = month_num)) %>% 
  filter(year_month < floor_date(today(), "months")) %>% 
  mutate(urls = paste0("https://www.seattle.gov/documents/Departments/SDCI/Resources/Stats/", year, month, "Summary", ".xlsx"))
@brshallo
brshallo / seattle-units-added-new-permits.md
Created July 26, 2023 19:56
Housing units added, new permits
library(tidyverse)
library(httr)
library(jsonlite)

# downloaded data from: https://data.seattle.gov/Permitting/Building-Permits/76t5-zqzr
data_permits <- read_csv("Building_Permits.csv")

data_permits %>% 
 filter(PermitTypeDesc == "New") %&gt;% 
# This example only includes a value in the rolling mean() if the close date on
# the historical dates comes after the snapshot date for row of interest
### CREATE SAMPLE DATA
library(tidyverse)
library(slider)
library(lubridate)
sample_size <- 5000
@brshallo
brshallo / rolling-mean-conditioned-on-iteration-date.R
Created June 1, 2023 06:15
Example of calculating a rolling mean but conditioning that upon each observations date being less than the date in the index for the row.
# This example only includes a value in the rolling mean() if the close date on
# the historical dates comes after the snapshot date for row of interest
### CREATE SAMPLE DATA
library(tidyverse)
library(slider)
sample_size <- 5000
obs_per_day <- 100
@brshallo
brshallo / lag-multiple-across.md
Last active December 6, 2022 18:04
is a riff on https://gist.github.com/brshallo/cadaa40cef6387e28924b6c3756627c9 but allows for use across multiple columns. Could make minor tweak to allow arbitrary function inputs, could also just use dplyover.
library(tidyverse)

lag_multiple <- function(x, n_vec){
  map(n_vec, lag, x = x) %>% 
    set_names(paste0("lag", n_vec)) %>% 
    as_tibble()
}

df <- tibble(a = 1:10, b = 11:20, c= 21:30)
library(dplyr)
library(ggplot2)
library(modeldata)

data(ames)

set.seed(123)
ames %>% 
  sample_n(200) %>%
@brshallo
brshallo / convert-currencies.R
Last active September 7, 2022 19:10
Improved and functionalized version of code pulled mostly from post on pricing -- can see this gist sourced in Appendix of post: https://www.bryanshalloway.com/2022/06/16/converting-between-currencies-using-pricer/#
library(priceR)
library(dplyr)
library(tidyr)
library(purrr)
library(lubridate)
# Create rates lookup table
pminmax <- function(x, y) {
paste(pmin.int(x, y), pmax.int(x, y), sep = ".")