Last active
February 20, 2019 14:06
-
-
Save tmasjc/8de69d5ebbc7f71b6727627fe6a10ba0 to your computer and use it in GitHub Desktop.
Calculate sum for a rolling time window in R #rstats #zoo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generate dummy data ----------------------------------------------------- | |
library(dplyr) | |
library(ggplot2) | |
library(lubridate) | |
start_day <- as.Date("2018-01-01") | |
# 2 Columns - Date & n | |
dat <- data.frame(date = start_day + days(1:90), n = round(runif(90, 1, 10))) | |
# Limited to one specific data frame -------------------------------------- | |
# A 30-day time window | |
get_window_sum <- function(end_date){ | |
# calculate end date | |
start_date <- end_date - days(30) | |
# filter start and end date | |
df <- dat %>% filter(date >= start_date & date <= end_date) | |
# sum | |
sum(df[['n']], na.rm = TRUE) | |
} | |
# Apply on data frame | |
method_one <- dat %>% rowwise() %>% mutate(window_sum = get_window_sum(date)) | |
# Quick plot | |
method_one %>% | |
ggplot(aes(date, window_sum)) + geom_line() | |
# Using ‘Zoo' package ----------------------------------------------------- | |
library(zoo) | |
# Convert to zoo object | |
z <- zoo(dat$n, dat$date) | |
# Use rollapply to apply FUN over a rolling margin | |
method_two <- rollapply(z, list(-(30:0)), sum, na.rm = TRUE) | |
# Verify with method one | |
sum(method_two %>% as.data.frame() == (method_one %>% filter(date >= "2018-02-01"))$window_sum) | |
# What if we have multiple groups? --------------------------------------- | |
# Simulate data | |
dat2 <- sample_n(dat, size = 300, replace = TRUE) %>% | |
rowwise() %>% | |
mutate(n = n + runif(1, 1, 3) %>% round()) %>% | |
# Assign random groups | |
cbind(grp = c("R", "G", "B")[runif(300, 1, 4)]) %>% | |
# Remove duplicated | |
group_by(date, grp) %>% | |
summarise(n = max(n)) %>% | |
arrange(date) %>% ungroup() | |
# Fill missing data for certain dates | |
dat2 <- tidyr::complete(dat2, date, grp) | |
# Split into smaller data frames based on grp | |
by_grp <- split(dat2, as.factor(dat2$grp)) | |
# Replicate method two into list of data frames | |
method_three <- by_grp %>% | |
purrr::map(.f = ~zoo(.$n, .$date)) %>% | |
purrr::map(rollapply, list(-(30:0)), sum, na.rm = TRUE) | |
# Quick plot | |
method_three %>% | |
as.data.frame() %>% | |
rownames_to_column(var = "date") %>% | |
mutate(date = as.Date(date)) %>% | |
tidyr::gather(grp, n, -date) %>% | |
ggplot(aes(date, n, col = grp, group = grp)) + | |
geom_line() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment