Skip to content

Instantly share code, notes, and snippets.

@brshallo
brshallo / row-stuff.md
Created October 20, 2021 23:59
Question answered for R4DS mentor hours
library(tidyverse)

tibble(id = c(3:7, NA, NA, 10:12, NA, NA)) %>% 
  mutate(NA_flag = is.na(id),
         NA_ID = cumsum(!NA_flag & lead(NA_flag))) %>% 
  group_by(NA_ID) %>% 
  mutate(NA_count = cumsum(NA_flag) * NA_flag,
         id_new = ifelse(!NA_flag, NA, min(id, na.rm = TRUE) + NA_count),
         id_final = ifelse(NA_flag, id_new, id))
@brshallo
brshallo / modeltime-cross-sectional-pred-intervals.md
Last active October 20, 2021 05:48
"off-label" use of {modeltime} funs to produce pred intervals on cross-sectional problem
library(tidyverse)
library(lubridate)
library(tidymodels)
library(modeltime)

set.seed(1234)
iris2 <- iris %>% 
  as_tibble() %>% 
  mutate(date = ymd(19800101) + days(1:n()))
@brshallo
brshallo / slide-multiple-double.md
Last active October 13, 2021 23:51
More flexible version than slide_multiple() shown here: https://gist.github.com/brshallo/e193b743f86ae62b5134707ec016fdf5
library(tidyverse)
library(slider)

slide_dbl_multiple <- function(.x, .f, ..., .before = 0L, .after = 0L, .step = 1L, .complete = FALSE, suffix = "") {
  x <- .x
  f <- .f
  f_name <- deparse(substitute(.f))
  
  map2(
library(tidyverse)
library(slider)

data <- tibble(x = 1:30)

## MAKING lots of time based features systematically with {slider}

lag_multiple <- function(x, n_vec){
  map(n_vec, lag, x = x) %>% 
if (!requireNamespace("piececor")) devtools::install_github('brshallo/piececor')
library(dplyr)

set.seed(1234)
x <- seq(.01, 3*pi, pi/100)
y <- sin(x) + rnorm(length(x), sd = 0.25)
xy <- tibble::tibble(x = x, y = y)
xy_dist <- infotheo::discretize(xy)
@brshallo
brshallo / standardizing-lengths.md
Created October 6, 2021 22:32
example for R4DS answer
a <- list(c("20M1", "A1", "ACC1"), c("20M2", "A2", "ACC2"), c("20M3"))

mx <- max(lengths(a))

data.frame(lapply(a, `length<-`, mx))
#>   c..20M1....A1....ACC1.. c..20M2....A2....ACC2.. c..20M3...NA..NA.
#> 1                    20M1                    20M2              20M3
#> 2                      A1                      A2              <NA>
#&gt; 3 ACC1 ACC2 
@brshallo
brshallo / dplyover-to-dplyr-sql.md
Last active October 6, 2021 05:21
Convert dplyover README examples to dplyr and SQL code
## dplyover examples in dplyr and sql
## I use dplyr::across() somewhat unnecessarily but is to show that these translate over fine

library(dbplyr)
library(dplyr, warn.conflicts = FALSE)

con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")

aaa <- tibble(a = 1:25)
library(tidyverse)
library(tidymodels)

devtools::source_gist("https://gist.github.com/brshallo/3db2cd25172899f91b196a90d5980690")
#> i Sourcing https://gist.githubusercontent.com/brshallo/3db2cd25172899f91b196a90d5980690/raw/5d6731b63fd75e09e7a5e1e33134c389f6209652/predict-interval.R
#> i SHA-1 hash of file is 3b41b16d53af745f880d21b4056fe18412b347e6

data <- tibble(x = abs(rnorm(1000)), 
               y = x + rnorm(1000, sd = 0.7) * x / (max(x))
@brshallo
brshallo / piecewise-spearman-corelation.md
Last active June 7, 2021 20:46
Mockup piecewise weighted Spearman Correlation based on derivatives of GAM model... per conversations with Shaina and Ricky
library(tidyverse)
library(mgcv)
library(gratia)

n_obs <- 10000

set.seed(12345)
new_dat <- tibble(x = rnorm(n_obs, sd = 2*pi),
                  y = sin(x) + rnorm(n_obs, sd = 0.5))
@brshallo
brshallo / continuous-engagements.md
Created June 3, 2021 21:24
Identify longest continuous engagement by customer

library(tidyverse)
library(lubridate)

T <- tibble(
  customer = c("c1", "c2", "c1", "c2", "c2", "c2"),
  start_date = ymd(c(
    20210107, 20210109, 20210201, 20210225, 20210314, 20210401