Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active October 25, 2024 00:11
Show Gist options
  • Save USMortality/12fa6301e6ad686aa66b5a4e72728b1a to your computer and use it in GitHub Desktop.
Save USMortality/12fa6301e6ad686aa66b5a4e72728b1a to your computer and use it in GitHub Desktop.
Excess Mortality vs COVID-19 Vaccination [USA]
library("readr")
library("dplyr")
library("ggplot2")
library("scales")
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
df <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
ts <- df |>
filter(iso_code == "USA") |>
select(date, excess_mortality, new_vaccinations) |>
filter(!is.na(excess_mortality))
first_vaxx <- ts |>
filter(!is.na(new_vaccinations)) |>
head(1)
ts <- ts |> filter(date >= first_vaxx$date)
# Calculate the correlation
correlation <- cor(ts$excess_mortality, ts$new_vaccinations, use = "complete.obs")
# Create the plot
chart <- ggplot(ts, aes(x = date)) +
geom_line(aes(y = excess_mortality), color = "red") +
geom_line(aes(y = new_vaccinations / 50000), color = "blue") +
scale_y_continuous(
name = "Excess Mortality",
labels = scales::percent_format(scale = 1),
sec.axis = sec_axis(~ . * 50000,
name = "New Vaccinations (per 50k)",
labels = scales::comma
)
) +
labs(
title = "Excess Mortality vs COVID-19 Vaccination [USA]",
subtitle = "Source: OWID",
caption = paste("Correlation:", round(correlation, 2)),
color = "Legend"
) +
theme_minimal() +
theme(
axis.title.y.right = element_text(color = "blue"),
axis.title.y.left = element_text(color = "red"),
legend.position = "top"
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Loop through shifts from 0 to 90 days, to find max correlation.
max_correlation <- 0
best_shift <- 0
for (shift in 0:90) {
ts$excess_mortality_shifted <- dplyr::lead(ts$excess_mortality, shift)
correlation_shifted <- cor(ts$excess_mortality_shifted, ts$new_vaccinations, use = "complete.obs")
if (correlation_shifted > max_correlation) {
max_correlation <- correlation_shifted
best_shift <- shift
}
}
# Output the maximum correlation and the shift
max_correlation
ts$excess_mortality_shifted <- dplyr::lead(ts$excess_mortality, best_shift)
# Create the plot
chart <- ggplot(ts, aes(x = date)) +
geom_line(aes(y = excess_mortality_shifted), color = "red") +
geom_line(aes(y = new_vaccinations / 50000), color = "blue") +
scale_y_continuous(
name = "Excess Mortality",
labels = scales::percent_format(scale = 1),
sec.axis = sec_axis(~ . * 50000,
name = "New Vaccinations (per 50k)",
labels = scales::comma
)
) +
labs(
title = paste0(best_shift, "-Days Shifted Excess Mortality vs COVID-19 Vaccination [USA]"),
subtitle = "Source: OWID",
caption = paste0(
"Correlation (", best_shift, "-day shift): ",
round(max_correlation, 2)
),
color = "Legend"
) +
theme_minimal() +
theme(
axis.title.y.right = element_text(color = "blue"),
axis.title.y.left = element_text(color = "red"),
legend.position = "top"
)
ggplot2::ggsave(
filename = "chart2.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
@USMortality
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment