Last active
January 29, 2020 09:40
-
-
Save sbfnk/77ffae24c7a61d70e112e470e388027a to your computer and use it in GitHub Desktop.
code to estimate time-varying R0 for nCoV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library('tidyverse') | |
library('ggplot2') | |
##' Samples size (the number of trials) of a binomial distribution | |
##' copied from https://github.com/sbfnk/bpmodels/blob/master/R/utils.r | |
rbinom_size <- function(n, x, prob) { | |
x + stats::rnbinom(n, x + 1, prob) | |
} | |
## | |
## get data | |
## | |
data_dir <- | |
path.expand(file.path("~", "Dropbox", "nCoV-2019", "data_sources", | |
"case_data")) | |
china_onsets <- | |
readr::read_csv(file.path(data_dir, "time_series_China_CDC.csv")) | |
wuhan_confirmed <- | |
readr::read_csv(file.path(data_dir, "time_series_HKU_Wuhan.csv")) | |
linelist <- WuhanSeedingVsTransmission::get_linelist() | |
## distribution of delays over time | |
time_delays <- linelist %>% | |
filter(!is.na(delay_confirmation)) %>% | |
mutate(days_confirmation = as.integer(delay_confirmation)) %>% | |
group_by(date_confirmation) %>% | |
summarise(mean = mean(days_confirmation), | |
min.90 = quantile(days_confirmation, 0.05), | |
max.90 = quantile(days_confirmation, 0.95), | |
min.iqr = quantile(days_confirmation, 0.25), | |
max.iqr = quantile(days_confirmation, 0.75), | |
median = median(days_confirmation)) | |
## TODO: improve on arbitrary cutoff date | |
cutoff <- as.Date("2020-01-21") | |
## extract confirmation delays from linelist | |
delays <- linelist %>% | |
dplyr::filter(date_confirmation > cutoff, !is.na(delay_confirmation)) %>% | |
.$delay_confirmation %>% | |
as.integer() | |
p <- ggplot(time_delays, aes(x = date_confirmation, y = median)) + | |
geom_line() + | |
geom_ribbon(aes(ymin = min.iqr, ymax = max.iqr), alpha = 0.4) + | |
geom_ribbon(aes(ymin = min.90, ymax = max.90), alpha = 0.2) + | |
geom_vline(xintercept = cutoff + 1, linetype = "dashed") + | |
xlab("Date of confirmation") + ylab("Delay (in days)") + | |
cowplot::theme_cowplot() + | |
expand_limits(y = 0) | |
cowplot::save_plot("time_delays.pdf", p) | |
## | |
## get reporting delay distribution | |
## | |
## Report was on the 26th, data up to the 25th. | |
## We combine 0- and 1-day delays into one bin. | |
## | |
counts <- table(delays) | |
## wrap 0-delays into 1-delays | |
counts[2] <- counts[2] + counts[1] | |
## remove 0-delays | |
counts <- counts[-1] | |
## get cumulative sum | |
cum_counts <- cumsum(counts) | |
## get cumulative frequency | |
cum_freq <- cum_counts / max(cum_counts) | |
## remove last data point (as it's 1 anyway) | |
cum_freq <- cum_freq[-length(cum_freq)] | |
## | |
## augment data with reporting delays | |
## | |
report_date <- as.Date("2020-01-26") | |
## number of "augmentations" | |
naug <- 100 | |
## get confirmed cases | |
onsets <- china_onsets$Confirmed | |
## arbitrary starting point at day 31 -- too many zeroes before | |
## TODO: apply more general criterion for starting point or use all the data | |
onsets <- onsets[31:length(onsets)] | |
## sample onsets naug times | |
sampled_onsets <- lapply(seq_len(naug), function(x) { | |
## indices to sample, from the last day in the data backwards | |
indices <- length(onsets) + 1 - as.integer(names(cum_freq)) | |
## combine unsampled onsets (those that are longer ago than the longest | |
## reporting delay) and sampled onsets (using binomial sampler above) | |
x_onsets <- | |
c(onsets[seq_len(min(indices-1))], | |
rev(rbinom_size(length(cum_freq), onsets[indices], cum_freq))) | |
## get dates | |
## TODO: this dates thing is done too often, should be simplified | |
dates <- | |
china_onsets$t[seq(nrow(china_onsets) + 1 - length(x_onsets), | |
nrow(china_onsets))] | |
return(tibble(date = dates, cases = x_onsets)) | |
}) | |
## | |
## get time-varying reproduction number with three serial interval distributions | |
## | |
## TODO: replace triple copy-pasted code with loop | |
rep_R <- list() | |
rep_R[["SARS-like"]] <- lapply(sampled_onsets, function(x) { | |
## estimate R with (relatively short) 2-day smoothing window | |
R <- | |
EpiEstim::estimate_R(x$cases, | |
method = "parametric_si", | |
config = | |
EpiEstim::make_config( | |
list(mean_si = 8.4, std_si = 3.8, | |
t_start = 2:26, t_end = 3:27)))$R | |
## TODO: better way of filtering out NAs (this breaks if there are NAs in the | |
## middle of the dataset for whatever reason) | |
R <- R[!is.na(R$`Mean(R)`), ] | |
## get dates | |
## TODO: this dates thing is done too often, should be simplified | |
dates <- | |
china_onsets$t[seq(nrow(china_onsets) + 1 - nrow(R), | |
nrow(china_onsets))] | |
return(tibble(date = dates, R = rnorm(nrow(R), R$`Mean(R)`, R$`Std(R)`))) | |
}) | |
rep_R[["early SARS-like"]] <- lapply(sampled_onsets, function(x) { | |
## estimate R with (relatively short) 2-day smoothing window | |
R <- | |
EpiEstim::estimate_R(x$cases, | |
method = "parametric_si", | |
config = | |
EpiEstim::make_config( | |
list(mean_si = 10.0, std_si = 2.8, | |
t_start = 2:26, t_end = 3:27 )))$R | |
## TODO: better way of filtering out NAs (this breaks if there are NAs in the | |
## middle of the dataset for whatever reason) | |
R <- R[!is.na(R$`Mean(R)`), ] | |
## get dates | |
## TODO: this dates thing is done too often, should be simplified | |
dates <- | |
china_onsets$t[seq(nrow(china_onsets) + 1 - nrow(R), | |
nrow(china_onsets))] | |
return(tibble(date = dates, R = rnorm(nrow(R), R$`Mean(R)`, R$`Std(R)`))) | |
}) | |
rep_R[["MERS-like"]] <- lapply(sampled_onsets, function(x) { | |
## estimate R with (relatively short) 2-day smoothing window | |
R <- | |
EpiEstim::estimate_R(x$cases, | |
method = "parametric_si", | |
config = | |
EpiEstim::make_config( | |
list(mean_si = 6.8, std_si = 4.1, | |
t_start = 2:26, t_end = 3:27 )))$R | |
## TODO: better way of filtering out NAs (this breaks if there are NAs in the | |
## middle of the dataset for whatever reason) | |
R <- R[!is.na(R$`Mean(R)`), ] | |
## get dates | |
## TODO: this dates thing is done too often, should be simplified | |
dates <- | |
china_onsets$t[seq(nrow(china_onsets) + 1 - nrow(R), | |
nrow(china_onsets))] | |
return(tibble(date = dates, R = rnorm(nrow(R), R$`Mean(R)`, R$`Std(R)`))) | |
}) | |
## summarise sampled onsets | |
cases <- bind_rows(sampled_onsets) %>% | |
group_by(date) %>% | |
summarise(min.90 = quantile(cases, 0.05), | |
max.90 = quantile(cases, 0.95), | |
min.iqr = quantile(cases, 0.25), | |
max.iqr = quantile(cases, 0.75), | |
median = median(cases), | |
mean = mean(cases)) | |
## summarise sampled reproduction numbers | |
R <- lapply(names(rep_R), function(x) { | |
bind_rows(rep_R[[x]]) %>% | |
mutate(`Serial interval distribution` = x) | |
}) %>% | |
bind_rows() %>% | |
group_by(date, `Serial interval distribution`) %>% | |
summarise(min.90 = quantile(R, 0.05), | |
max.90 = quantile(R, 0.95), | |
min.iqr = quantile(R, 0.25), | |
max.iqr = quantile(R, 0.75), | |
median = median(R), | |
mean = mean(R)) | |
## plot | |
p <- ggplot(R, aes(x = date, y = mean, color = `Serial interval distribution`, | |
fill = `Serial interval distribution`)) + | |
geom_line() + | |
geom_ribbon(aes(ymin = min.iqr, ymax = max.iqr), alpha = 0.4) + | |
geom_ribbon(aes(ymin = min.90, ymax = max.90), alpha = 0.2) + | |
scale_color_brewer(palette = "Dark2") + | |
scale_fill_brewer(palette = "Dark2") + | |
xlab("") + ylab("R") + | |
cowplot::theme_cowplot() + | |
expand_limits(y = 0) | |
cowplot::save_plot("time_R.pdf", p) | |
## plot | |
p <- ggplot(cases, aes(x = date, y = mean)) + | |
geom_line() + | |
geom_line(data = china_onsets, aes(x = t, y = Confirmed), color = "red") + | |
geom_ribbon(aes(ymin = min.iqr, ymax = max.iqr), alpha = 0.4) + | |
geom_ribbon(aes(ymin = min.90, ymax = max.90), alpha = 0.2) + | |
xlab("") + ylab("Cases") + | |
cowplot::theme_cowplot() + | |
expand_limits(y = 0) | |
cowplot::save_plot("time_cases.pdf", p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment