Skip to content

Instantly share code, notes, and snippets.

@StaffanBetner
Last active December 16, 2021 09:09
Show Gist options
  • Save StaffanBetner/0314e091aa3aef692f65dcfe0d0e369c to your computer and use it in GitHub Desktop.
Save StaffanBetner/0314e091aa3aef692f65dcfe0d0e369c to your computer and use it in GitHub Desktop.
pacman::p_load(tidyverse, rio, magrittr, janitor)
# This is a function for dividing a time series into periods, with allowence for gaps (threshold)
# data = dataset
# variable = time for measurement, e.g. year (duplicates allowed)
# threshold = number of consecutive years (e.g.) allowed missing in a period
# filter_less_than = filters periods with less than N values (not the length from min to max!)
periods <- function(data,
variable,
threshold = 3,
filter_less_than = FALSE){
enquo(variable) -> variable_enquo
data %>% group_vars() %>% syms() -> groups_vec
out <- data %>%
dplyr::arrange(!!variable_enquo) %>%
mutate(period = cumsum(c(1, diff(!!variable_enquo) >= threshold+1))) %>%
group_by(period, .add=T) %>%
mutate(n_year_period = n_distinct(!!variable_enquo))
if(!is_empty(groups_vec)){out %>% group_by(!!!groups_vec) -> out}else{out %>% ungroup() -> out}
if(filter_less_than){out %>%
filter(n_year_period >= filter_less_than) %>%
mutate(period = cumsum(c(1, diff(!!variable_enquo) >= threshold+1))) ->
out
}
return(out)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment