Created
April 8, 2025 09:15
-
-
Save adamkucharski/8444fd8cfb3ddb0b01137a3ba5388941 to your computer and use it in GitHub Desktop.
CFR estimation with diamond_princess data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Correcting for real-time CFR bias ------------------------------------------------- | |
# This script builds on the theory outlined in this vignette: | |
# https://epiverse-trace.github.io/cfr/articles/estimate_from_individual_data.html | |
# Load packages | |
library(cfr) | |
library(dplyr) | |
library(ggplot2) | |
library(gridExtra) | |
setwd("~/Dropbox/LSHTM/TRACE_project/2025_02_workshop/") | |
# Load data and parameters -------------------------------------------- | |
# Load Diamond Princess Data | |
covid_data <- read.csv("diamond_princess.csv") | |
covid_data$date <- as.Date(covid_data$date) | |
# Truncate real-time data to end Feb | |
cutoff_date <- as.Date("2020-03-10") | |
covid_data_rt <- covid_data |> filter(date<cutoff_date) | |
# Create plot | |
p_cases <- ggplot(covid_data_rt, aes(x = date, y = cases)) + | |
geom_col(fill = "blue") + | |
labs(title = "Daily Cases Over Time", x = "Date", y = "Cases") + | |
theme_minimal() | |
p_deaths <- ggplot(covid_data_rt, aes(x = date, y = deaths)) + | |
geom_col(fill = "orange") + | |
labs(title = "Daily Deaths Over Time", x = "Date", y = "Deaths") + | |
theme_minimal() | |
grid.arrange(p_cases, p_deaths, nrow = 2) | |
# Get onset-to-death distribution | |
onset_to_death_period_in <- | |
epiparameter::epiparameter_db( | |
disease = "covid", | |
epi_name = "onset to death", | |
single_epiparameter = TRUE | |
) | |
# Calculate CFR and 95% CI | |
cfr_estimate <- cfr_static( | |
data = covid_data_rt, | |
delay_density = function(x) density(onset_to_death_period_in, x) | |
) | |
print(cfr_estimate) | |
# Add forward dates to match full dataset | |
last_date <- max(covid_data_rt$date) | |
future_dates <- seq(last_date + 1, max(covid_data$date), by = "day") | |
blank_data <- data.frame( | |
date = future_dates, | |
cases = 0, | |
deaths = 0 | |
) | |
# Combine original and blank data | |
covid_data_future <- rbind(covid_data_rt, blank_data) | |
# Estimated outcomes | |
est_deaths <- estimate_outcomes( | |
data = covid_data_future, | |
delay_density = function(x) density(onset_to_death_period_in, x) | |
) | |
# Calculate cumulative outcomes | |
est_deaths <- est_deaths |> mutate(cumulative_outcomes = cumsum(estimated_outcomes * cfr_estimate$severity_estimate), | |
cumulative_outcomes_lower = cumsum(estimated_outcomes * cfr_estimate$severity_low), | |
cumulative_outcomes_upper = cumsum(estimated_outcomes * cfr_estimate$severity_high)) | |
# Calculate cumulative deaths at end of outbreak (i.e. not in real-time) | |
covid_data <- covid_data |> mutate(cumulative_deaths = cumsum(deaths)) | |
est_deaths$cumulative_deaths_final <- covid_data$cumulative_deaths | |
# Create forward plot | |
ggplot(est_deaths, aes(x = date)) + | |
geom_col(aes(y = cumulative_deaths_final), fill = "orange") + | |
geom_ribbon(aes(ymin = cumulative_outcomes_lower, ymax = cumulative_outcomes_upper), | |
fill = "grey70", alpha = 0.4) + | |
geom_line(aes(y = cumulative_outcomes), color = "black", size = 1) + | |
labs(title = "Expected Cumulative Deaths Over Time (black) and Actual Deaths (orange", | |
x = "Date", y = "Count") + | |
geom_vline(xintercept = as.numeric(cutoff_date), linetype = "dashed", color = "black") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment