Last active
July 20, 2025 12:46
-
-
Save USMortality/e3c12aa29d4fa20ac1ff8b04df5779d4 to your computer and use it in GitHub Desktop.
Excess Deaths vs VAERS Reported Deaths [USA]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(tsibble) | |
library(lubridate) | |
library(scales) | |
library(fable) | |
library(fabletools) | |
# Set up high-res plotting | |
width <- 600 * 2 | |
height <- 335 * 2 | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * 2)) | |
deaths <- read_csv( | |
"https://s3.mortality.watch/data/mortality/USA/monthly.csv" | |
) |> | |
as_tibble() |> | |
select(`date`, `deaths`) |> | |
setNames(c("date", "deaths")) |> | |
mutate(date = yearmonth(date), deaths = as.integer(deaths)) |> | |
filter(date >= make_yearmonth(2010, 1), date <= make_yearmonth(2023, 12)) | |
# Read and process VAERS data | |
vaers <- read_delim( | |
"https://apify.mortality.watch/cdc-wonder/vaers-deaths-month.txt", | |
delim = "\t", col_types = cols(.default = "c") | |
) |> | |
select(`Month Died Code`, `Events Reported`) |> | |
setNames(c("date", "deaths")) |> | |
filter(!is.na(date), !is.na(deaths)) |> | |
mutate( | |
date = yearmonth(as.Date(paste0(date, "/01"), format = "%Y/%m/%d")), | |
deaths = as.numeric(deaths) | |
) | |
# Join datasets and filter to 2020+ | |
df <- vaers |> | |
right_join(deaths, by = join_by(date == date)) |> | |
setNames(c("date", "vaers_deaths", "all_deaths")) |> | |
filter(date >= make_yearmonth(2020, 1)) | |
# Calculate scaling factor for dual axis | |
scale_factor <- max(df$all_deaths, na.rm = TRUE) / | |
max(df$vaers_deaths, na.rm = TRUE) / 4 | |
# Create the dual-axis plot | |
chart <- ggplot(df, aes(x = date)) + | |
# All-cause deaths (left axis) | |
geom_line(aes(y = all_deaths), | |
color = "blue", size = 1.2, alpha = 0.8 | |
) + | |
# VAERS deaths (scaled for right axis) | |
geom_line(aes(y = vaers_deaths * scale_factor), | |
color = "red", size = 1.2, alpha = 0.8 | |
) + | |
# Left y-axis | |
scale_y_continuous( | |
name = "All-Cause Deaths", | |
labels = comma_format(), | |
sec.axis = sec_axis( | |
trans = ~ . / scale_factor, | |
name = "VAERS Deaths", | |
labels = comma_format() | |
) | |
) + | |
scale_x_yearmonth() + | |
# Labels and theme | |
labs( | |
title = "All-Cause Deaths vs VAERS Reported Deaths", | |
subtitle = "United States, January 2020 onwards", | |
caption = paste0( | |
"VAERS deaths are suspected cases reported to the Vaccine Adverse Event ", | |
"Reporting System\nSource: CDC WONDER & VAERS | @USMortality" | |
), | |
x = NULL | |
) + | |
theme_minimal() + | |
theme( | |
panel.background = element_rect(fill = "white", color = NA), | |
plot.background = element_rect(fill = "white", color = NA), | |
axis.text.x = element_text(angle = 45, hjust = 1), | |
axis.text.y.left = element_text(color = "blue"), | |
axis.title.y.left = element_text(color = "blue"), | |
axis.text.y.right = element_text(color = "red"), | |
axis.title.y.right = element_text(color = "red") | |
) | |
ggplot2::ggsave( | |
filename = "chart1.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * 2, device = grDevices::png, type = c("cairo") | |
) | |
deaths_ts <- deaths |> as_tsibble() | |
# Fit fable models for baseline (2010-2019) with seasonal linear trend | |
baseline_data <- deaths_ts |> filter( | |
date >= make_yearmonth(2010, 1), | |
date <= make_yearmonth(2019, 12) | |
) | |
# Fit TSLM model with just linear trend (no seasonality) | |
fit <- baseline_data |> model(TSLM(deaths ~ trend())) | |
# Forecast for 2020+ period | |
forecast_period <- deaths_ts |> | |
filter(date >= make_yearmonth(2020, 1)) |> | |
select(date) | |
# Generate forecasts for the 2020+ period | |
forecasts <- fit |> forecast(new_data = forecast_period) | |
# Extract point forecasts and calculate excess deaths | |
excess_data <- deaths_ts |> | |
filter(date >= make_yearmonth(2020, 1)) |> | |
left_join( | |
forecasts |> | |
as_tibble() |> | |
select(date, .mean) |> | |
rename(expected_deaths = .mean), | |
by = "date" | |
) |> | |
mutate(excess_deaths = deaths - expected_deaths) | |
# Join with VAERS data for comparison | |
excess_vaers <- vaers |> | |
filter(date >= make_yearmonth(2020, 1)) |> | |
left_join(excess_data |> select(date, excess_deaths), by = "date") |> | |
filter(!is.na(excess_deaths)) | |
# Calculate scaling factor for dual axis | |
scale_factor_excess <- max(excess_vaers$excess_deaths, na.rm = TRUE) / | |
max(excess_vaers$deaths, na.rm = TRUE) / 4 | |
# Create the excess deaths vs VAERS deaths chart | |
chart2 <- ggplot(excess_vaers, aes(x = date)) + | |
# Excess deaths (left axis) | |
geom_line(aes(y = excess_deaths), | |
color = "blue", size = 1.2, alpha = 0.8 | |
) + | |
# VAERS deaths (scaled for right axis) | |
geom_line(aes(y = deaths * scale_factor_excess), | |
color = "red", size = 1.2, alpha = 0.8 | |
) + | |
# Add zero line for reference | |
geom_hline( | |
yintercept = 0, linetype = "dashed", color = "gray50", alpha = 0.7 | |
) + | |
# Left y-axis | |
scale_y_continuous( | |
name = "Excess Deaths (vs 2010-2019 linear trend)", | |
labels = comma_format(), | |
sec.axis = sec_axis( | |
trans = ~ . / scale_factor_excess, | |
name = "VAERS Deaths", | |
labels = comma_format() | |
) | |
) + | |
scale_x_yearmonth() + | |
# Labels and theme | |
labs( | |
title = "Excess Deaths vs VAERS Reported Deaths", | |
subtitle = paste0( | |
"United States, January 2020 onwards ", | |
"(Excess calculated vs 2010-2019 linear trend)" | |
), | |
caption = paste0( | |
"Excess deaths calculated as actual deaths minus 2010-2019 linear ", | |
"regression baseline\nVAERS deaths are suspected cases reported to the ", | |
"Vaccine Adverse Event Reporting System\nSource: CDC WONDER & ", | |
"VAERS | @USMortality" | |
), | |
x = NULL | |
) + | |
theme_minimal() + | |
theme( | |
panel.background = element_rect(fill = "white", color = NA), | |
plot.background = element_rect(fill = "white", color = NA), | |
axis.text.x = element_text(angle = 45, hjust = 1), | |
axis.text.y.left = element_text(color = "blue"), | |
axis.title.y.left = element_text(color = "blue"), | |
axis.text.y.right = element_text(color = "red"), | |
axis.title.y.right = element_text(color = "red") | |
) | |
# Save the excess deaths chart | |
ggplot2::ggsave( | |
filename = "chart2.png", plot = chart2, width = width, height = height, | |
units = "px", dpi = 72 * 2, device = grDevices::png, type = c("cairo") | |
) | |
# Print summary statistics | |
cat("\n=== Excess Deaths Analysis ===\n") | |
cat("Baseline period: 2010-2019 (linear regression)\n") | |
cat("Analysis period: 2020 onwards\n") | |
cat( | |
"Total excess deaths (2020+):", | |
format(round(sum(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","), | |
"\n" | |
) | |
cat( | |
"Average monthly excess deaths:", | |
format(round(mean(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","), | |
"\n" | |
) | |
cat( | |
"Peak monthly excess deaths:", | |
format(round(max(excess_data$excess_deaths, na.rm = TRUE)), big.mark = ","), | |
"\n" | |
) | |
cat( | |
"Total VAERS deaths (2020+):", | |
format(sum(excess_vaers$deaths, na.rm = TRUE), big.mark = ","), "\n" | |
) | |
# source("~/Desktop/1.r") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment