Last active
December 16, 2021 09:09
-
-
Save StaffanBetner/0314e091aa3aef692f65dcfe0d0e369c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pacman::p_load(tidyverse, rio, magrittr, janitor) | |
# This is a function for dividing a time series into periods, with allowence for gaps (threshold) | |
# data = dataset | |
# variable = time for measurement, e.g. year (duplicates allowed) | |
# threshold = number of consecutive years (e.g.) allowed missing in a period | |
# filter_less_than = filters periods with less than N values (not the length from min to max!) | |
periods <- function(data, | |
variable, | |
threshold = 3, | |
filter_less_than = FALSE){ | |
enquo(variable) -> variable_enquo | |
data %>% group_vars() %>% syms() -> groups_vec | |
out <- data %>% | |
dplyr::arrange(!!variable_enquo) %>% | |
mutate(period = cumsum(c(1, diff(!!variable_enquo) >= threshold+1))) %>% | |
group_by(period, .add=T) %>% | |
mutate(n_year_period = n_distinct(!!variable_enquo)) | |
if(!is_empty(groups_vec)){out %>% group_by(!!!groups_vec) -> out}else{out %>% ungroup() -> out} | |
if(filter_less_than){out %>% | |
filter(n_year_period >= filter_less_than) %>% | |
mutate(period = cumsum(c(1, diff(!!variable_enquo) >= threshold+1))) -> | |
out | |
} | |
return(out) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment