Last active
December 30, 2020 22:42
-
-
Save sdtaylor/d0b4e91be94bac8753dc6c6f81bd0ee6 to your computer and use it in GitHub Desktop.
Massage survival data to different dates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# Create a "date bin" data.frame with columns c('start','end') | |
# start_date, and last_date are dates | |
# time_bin_size should be string like '1 year','6 month', etc. see ?seq.Date for more examples | |
bin_dates = function(first_date, last_date, time_bin_size='1 year'){ | |
if(!(lubridate::is.Date(first_date) & lubridate::is.Date(last_date))) stop('first_date and last_date must be dates') | |
if(first_date>=last_date) stop('first_date must come before last date') | |
full_range = seq.Date(first_date,last_date, by=time_bin_size) | |
starts = full_range[-length(full_range)] | |
ends = full_range[-1] | |
return(data.frame(start=starts, end=ends)) | |
} | |
# Takes a data.frame 'df' with columns c('id','start','end','dead') | |
# time_bin_size should be string like '1 year','6 month', etc. see ?seq.Date for more | |
# returns a data.frame where each unique ID | |
stretch_survival_data = function(df, time_bin_size){ | |
dead_animals = df %>% | |
filter(dead==1) %>% | |
pull(id) | |
# apply the appropriate binning | |
# here group_by() %>% summarize() will submit each unique ID to the bin_dates() function | |
# and build the appropriate data.frame with all the new rows. | |
stretched_df = df %>% | |
group_by(id) %>% | |
summarise(bin_dates(first_date = start, last_date=end, time_bin_size)) %>% | |
ungroup() | |
# mark dead animals as such (1) in their last respective timestep, othwerwise mark as | |
# alive (0) | |
stretched_df = stretched_df%>% | |
group_by(id) %>% | |
mutate(dead = case_when( | |
id %in% dead_animals & end==max(end) ~ 1, | |
TRUE ~ 0 | |
)) %>% | |
ungroup() | |
return(stretched_df) | |
} | |
# test it out | |
original = tribble( | |
~id, ~start, ~end, ~dead, | |
'A', '2012-01-01', '2014-01-01', 0, | |
'B', '2012-01-01', '2014-01-01', 1, | |
'C', '2005-01-01', '2014-01-01', 0, | |
'D', '2015-01-01', '2018-08-01', 1 | |
) | |
original$start = as.Date(original$start) | |
original$end = as.Date(original$end) | |
stretch_survival_data(original, '6 month') | |
stretch_survival_data(original, '1 month') | |
stretch_survival_data(original, '1 year') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment