Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active July 20, 2025 12:46
Show Gist options
  • Save USMortality/e3c12aa29d4fa20ac1ff8b04df5779d4 to your computer and use it in GitHub Desktop.
Save USMortality/e3c12aa29d4fa20ac1ff8b04df5779d4 to your computer and use it in GitHub Desktop.
Excess Deaths vs VAERS Reported Deaths [USA]
library(tidyverse)
library(tsibble)
library(lubridate)
library(scales)
library(fable)
library(fabletools)
# Set up high-res plotting
width <- 600 * 2
height <- 335 * 2
options(vsc.dev.args = list(width = width, height = height, res = 72 * 2))
deaths <- read_csv(
"https://s3.mortality.watch/data/mortality/USA/monthly.csv"
) |>
as_tibble() |>
select(`date`, `deaths`) |>
setNames(c("date", "deaths")) |>
mutate(date = yearmonth(date), deaths = as.integer(deaths)) |>
filter(date >= make_yearmonth(2010, 1), date <= make_yearmonth(2023, 12))
# Read and process VAERS data
vaers <- read_delim(
"https://apify.mortality.watch/cdc-wonder/vaers-deaths-month.txt",
delim = "\t", col_types = cols(.default = "c")
) |>
select(`Month Died Code`, `Events Reported`) |>
setNames(c("date", "deaths")) |>
filter(!is.na(date), !is.na(deaths)) |>
mutate(
date = yearmonth(as.Date(paste0(date, "/01"), format = "%Y/%m/%d")),
deaths = as.numeric(deaths)
)
# Join datasets and filter to 2020+
df <- vaers |>
right_join(deaths, by = join_by(date == date)) |>
setNames(c("date", "vaers_deaths", "all_deaths")) |>
filter(date >= make_yearmonth(2020, 1))
# Calculate scaling factor for dual axis
scale_factor <- max(df$all_deaths, na.rm = TRUE) /
max(df$vaers_deaths, na.rm = TRUE) / 4
# Create the dual-axis plot
chart <- ggplot(df, aes(x = date)) +
# All-cause deaths (left axis)
geom_line(aes(y = all_deaths),
color = "blue", size = 1.2, alpha = 0.8
) +
# VAERS deaths (scaled for right axis)
geom_line(aes(y = vaers_deaths * scale_factor),
color = "red", size = 1.2, alpha = 0.8
) +
# Left y-axis
scale_y_continuous(
name = "All-Cause Deaths",
labels = comma_format(),
sec.axis = sec_axis(
trans = ~ . / scale_factor,
name = "VAERS Deaths",
labels = comma_format()
)
) +
scale_x_yearmonth() +
# Labels and theme
labs(
title = "All-Cause Deaths vs VAERS Reported Deaths",
subtitle = "United States, January 2020 onwards",
caption = paste0(
"VAERS deaths are suspected cases reported to the Vaccine Adverse Event ",
"Reporting System\nSource: CDC WONDER & VAERS | @USMortality"
),
x = NULL
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y.left = element_text(color = "blue"),
axis.title.y.left = element_text(color = "blue"),
axis.text.y.right = element_text(color = "red"),
axis.title.y.right = element_text(color = "red")
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * 2, device = grDevices::png, type = c("cairo")
)
deaths_ts <- deaths |> as_tsibble()
# Fit fable models for baseline (2010-2019) with seasonal linear trend
baseline_data <- deaths_ts |> filter(
date >= make_yearmonth(2010, 1),
date <= make_yearmonth(2019, 12)
)
# Fit TSLM model with just linear trend (no seasonality)
fit <- baseline_data |> model(TSLM(deaths ~ trend()))
# Forecast for 2020+ period
forecast_period <- deaths_ts |>
filter(date >= make_yearmonth(2020, 1)) |>
select(date)
# Generate forecasts for the 2020+ period
forecasts <- fit |> forecast(new_data = forecast_period)
# Extract point forecasts and calculate excess deaths
excess_data <- deaths_ts |>
filter(date >= make_yearmonth(2020, 1)) |>
left_join(
forecasts |>
as_tibble() |>
select(date, .mean) |>
rename(expected_deaths = .mean),
by = "date"
) |>
mutate(excess_deaths = deaths - expected_deaths)
# Join with VAERS data for comparison
excess_vaers <- vaers |>
filter(date >= make_yearmonth(2020, 1)) |>
left_join(excess_data |> select(date, excess_deaths), by = "date") |>
filter(!is.na(excess_deaths))
# Calculate scaling factor for dual axis
scale_factor_excess <- max(excess_vaers$excess_deaths, na.rm = TRUE) /
max(excess_vaers$deaths, na.rm = TRUE) / 4
# Create the excess deaths vs VAERS deaths chart
chart2 <- ggplot(excess_vaers, aes(x = date)) +
# Excess deaths (left axis)
geom_line(aes(y = excess_deaths),
color = "blue", size = 1.2, alpha = 0.8
) +
# VAERS deaths (scaled for right axis)
geom_line(aes(y = deaths * scale_factor_excess),
color = "red", size = 1.2, alpha = 0.8
) +
# Add zero line for reference
geom_hline(
yintercept = 0, linetype = "dashed", color = "gray50", alpha = 0.7
) +
# Left y-axis
scale_y_continuous(
name = "Excess Deaths (vs 2010-2019 linear trend)",
labels = comma_format(),
sec.axis = sec_axis(
trans = ~ . / scale_factor_excess,
name = "VAERS Deaths",
labels = comma_format()
)
) +
scale_x_yearmonth() +
# Labels and theme
labs(
title = "Excess Deaths vs VAERS Reported Deaths",
subtitle = paste0(
"United States, January 2020 onwards ",
"(Excess calculated vs 2010-2019 linear trend)"
),
caption = paste0(
"Excess deaths calculated as actual deaths minus 2010-2019 linear ",
"regression baseline\nVAERS deaths are suspected cases reported to the ",
"Vaccine Adverse Event Reporting System\nSource: CDC WONDER & ",
"VAERS | @USMortality"
),
x = NULL
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y.left = element_text(color = "blue"),
axis.title.y.left = element_text(color = "blue"),
axis.text.y.right = element_text(color = "red"),
axis.title.y.right = element_text(color = "red")
)
# Save the excess deaths chart
ggplot2::ggsave(
filename = "chart2.png", plot = chart2, width = width, height = height,
units = "px", dpi = 72 * 2, device = grDevices::png, type = c("cairo")
)
# Print summary statistics
cat("\n=== Excess Deaths Analysis ===\n")
cat("Baseline period: 2010-2019 (linear regression)\n")
cat("Analysis period: 2020 onwards\n")
cat(
"Total excess deaths (2020+):",
format(round(sum(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","),
"\n"
)
cat(
"Average monthly excess deaths:",
format(round(mean(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","),
"\n"
)
cat(
"Peak monthly excess deaths:",
format(round(max(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","),
"\n"
)
cat(
"Total VAERS deaths (2020+):",
format(sum(excess_vaers$deaths, na.rm = TRUE), big.mark = ","), "\n"
)
# source("~/Desktop/1.r")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment