Skip to content

Instantly share code, notes, and snippets.

@brshallo
Created June 12, 2023 16:30
Show Gist options
  • Save brshallo/4ffe9272a37044571915f66860b54329 to your computer and use it in GitHub Desktop.
Save brshallo/4ffe9272a37044571915f66860b54329 to your computer and use it in GitHub Desktop.
Slight update to https://gist.github.com/brshallo/4093106372afefdda5c2e223fc53a3fc but with an additional condition
# This example only includes a value in the rolling mean() if the close date on
# the historical dates comes after the snapshot date for row of interest
### CREATE SAMPLE DATA
library(tidyverse)
library(slider)
library(lubridate)
sample_size <- 5000
obs_per_day <- 100
day_steps <- seq(from = 1, by = 7, length.out = sample_size / obs_per_day) %>%
map(rep, obs_per_day) %>%
unlist()
set.seed(12)
data <- tibble(
group = sample(LETTERS[1:4], sample_size, TRUE),
# id = sample(LETTERS[1:10], sample_size, TRUE),
snapshot_date = lubridate::ymd(20220101) + days(day_steps),
close_date = snapshot_date + days(sample(1:120, sample_size, TRUE)),
projected_close = close_date + days(sample(-30:30, sample_size, TRUE)),
win = ifelse(projected_close < close_date, 1, 0)
) %>%
arrange(snapshot_date, close_date) %>%
filter(projected_close > snapshot_date) %>%
group_by(group, close_date) %>%
#mutate(count = row_number())
# removing any obs that have closed date after final snapshot date...
#mutate(win = ifelse(close_date >= max(snapshot_date), NA, win)) %>%
filter(!is.na(win), group == "A")
## EXAMPLE
## Include historical observations in the rolling average if the closed date comes after the row's snap date
output <- data %>%
group_by(group) %>%
mutate(
row = row_number(),
# ctrl + f "Accessing the current index value" here for approach:
# https://slider.r-lib.org/reference/slide_index.html
w30_prep = slider::pslide_index(
.l = list(win, close_date, projected_close),
.i = snapshot_date,
.f = list,
#I set this to 900, ~ 3 years, to ensure this is cumulative over the entire data to compare outputs
.before = 900,
# below is negative so doesn't include current date of values
.after = -1
),
win30 = map2_dbl(.x = w30_prep, .y = snapshot_date,
.f = ~mean(.x[[1]] * ifelse( (.x[[2]] <= .y) | (.x[[3]] <= .y), 1, NA), na.rm = TRUE))
) %>% filter(!is.nan(win30)) %>%
distinct(group, snapshot_date, .keep_all = T)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment