Skip to content

Instantly share code, notes, and snippets.

@adamkucharski
Created April 8, 2025 09:15
Show Gist options
  • Save adamkucharski/8444fd8cfb3ddb0b01137a3ba5388941 to your computer and use it in GitHub Desktop.
Save adamkucharski/8444fd8cfb3ddb0b01137a3ba5388941 to your computer and use it in GitHub Desktop.
CFR estimation with diamond_princess data
# Correcting for real-time CFR bias -------------------------------------------------
# This script builds on the theory outlined in this vignette:
# https://epiverse-trace.github.io/cfr/articles/estimate_from_individual_data.html
# Load packages
library(cfr)
library(dplyr)
library(ggplot2)
library(gridExtra)
setwd("~/Dropbox/LSHTM/TRACE_project/2025_02_workshop/")
# Load data and parameters --------------------------------------------
# Load Diamond Princess Data
covid_data <- read.csv("diamond_princess.csv")
covid_data$date <- as.Date(covid_data$date)
# Truncate real-time data to end Feb
cutoff_date <- as.Date("2020-03-10")
covid_data_rt <- covid_data |> filter(date<cutoff_date)
# Create plot
p_cases <- ggplot(covid_data_rt, aes(x = date, y = cases)) +
geom_col(fill = "blue") +
labs(title = "Daily Cases Over Time", x = "Date", y = "Cases") +
theme_minimal()
p_deaths <- ggplot(covid_data_rt, aes(x = date, y = deaths)) +
geom_col(fill = "orange") +
labs(title = "Daily Deaths Over Time", x = "Date", y = "Deaths") +
theme_minimal()
grid.arrange(p_cases, p_deaths, nrow = 2)
# Get onset-to-death distribution
onset_to_death_period_in <-
epiparameter::epiparameter_db(
disease = "covid",
epi_name = "onset to death",
single_epiparameter = TRUE
)
# Calculate CFR and 95% CI
cfr_estimate <- cfr_static(
data = covid_data_rt,
delay_density = function(x) density(onset_to_death_period_in, x)
)
print(cfr_estimate)
# Add forward dates to match full dataset
last_date <- max(covid_data_rt$date)
future_dates <- seq(last_date + 1, max(covid_data$date), by = "day")
blank_data <- data.frame(
date = future_dates,
cases = 0,
deaths = 0
)
# Combine original and blank data
covid_data_future <- rbind(covid_data_rt, blank_data)
# Estimated outcomes
est_deaths <- estimate_outcomes(
data = covid_data_future,
delay_density = function(x) density(onset_to_death_period_in, x)
)
# Calculate cumulative outcomes
est_deaths <- est_deaths |> mutate(cumulative_outcomes = cumsum(estimated_outcomes * cfr_estimate$severity_estimate),
cumulative_outcomes_lower = cumsum(estimated_outcomes * cfr_estimate$severity_low),
cumulative_outcomes_upper = cumsum(estimated_outcomes * cfr_estimate$severity_high))
# Calculate cumulative deaths at end of outbreak (i.e. not in real-time)
covid_data <- covid_data |> mutate(cumulative_deaths = cumsum(deaths))
est_deaths$cumulative_deaths_final <- covid_data$cumulative_deaths
# Create forward plot
ggplot(est_deaths, aes(x = date)) +
geom_col(aes(y = cumulative_deaths_final), fill = "orange") +
geom_ribbon(aes(ymin = cumulative_outcomes_lower, ymax = cumulative_outcomes_upper),
fill = "grey70", alpha = 0.4) +
geom_line(aes(y = cumulative_outcomes), color = "black", size = 1) +
labs(title = "Expected Cumulative Deaths Over Time (black) and Actual Deaths (orange",
x = "Date", y = "Count") +
geom_vline(xintercept = as.numeric(cutoff_date), linetype = "dashed", color = "black") +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment