Last active
February 13, 2025 03:31
-
-
Save USMortality/7be69b3d8087c7e67139a770e3f1fabf to your computer and use it in GitHub Desktop.
Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(tidyr) | |
library(ggplot2) | |
library(dplyr) | |
library(stringr) | |
library(scales) | |
library(lubridate) | |
library(tsibble) | |
source(paste0( | |
"https://gist.githubusercontent.com/USMortality/", | |
"cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r" | |
)) | |
sf <- 2 | |
width <- 1080 * sf | |
height <- 1920 / 4 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
# Settings | |
per_factor <- 100000 | |
bl_start_year <- 2017 | |
bl_end_year <- 2019 | |
obs_start_year <- 2020 | |
obs_end_year <- 2024 | |
# Deaths | |
deaths <- read_csv("https://s3.mortality.watch/data/deaths/deu/deaths.csv") |> | |
filter(jurisdiction == "Deutschland") | |
# Population | |
parse_age_groups <- function(df) { | |
df$age_group <- sub("unter 1 Jahr", "0", df$age_group) | |
df$age_group <- sub("unter 15 Jahre", "0-14", df$age_group) | |
df$age_group <- sub("50 Jahre und mehr", "50+", df$age_group) | |
df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group) | |
df$age_group <- sub("Insgesamt", "all", df$age_group) | |
df$age_group <- sub("Alter unbekannt", NA, df$age_group) | |
df$age_group <- sub("-Jährige", "", df$age_group) | |
df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group) | |
df | |
} | |
# Population | |
pop <- read_delim( | |
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz", | |
delim = ";", skip = 6 | |
) | |
# Cleanup | |
names(pop)[1] <- "age_group" | |
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1]) | |
pop <- pop |> | |
filter(!is.na(`2000`)) |> | |
parse_age_groups() | |
pop <- pop |> | |
pivot_longer( | |
cols = !c("age_group"), names_to = "year", values_to = "population" | |
) | |
pop$year <- as.integer(pop$year) | |
pop$population <- as.integer(pop$population) | |
# Summarize deaths data and align age groups using case_when | |
# Align and summarize deaths data, consolidating 85+ groups | |
df_deaths <- deaths %>% | |
mutate(age_group = case_when( | |
age_group %in% c("85-89", "90-94", "95+", "Insgesamt") ~ "85+", | |
TRUE ~ age_group # Keep other age groups as they are | |
)) %>% | |
group_by(year, week, age_group) %>% | |
summarize(deaths = sum(deaths), .groups = "drop") | |
df_deaths_all <- df_deaths |> | |
group_by(year, week) |> | |
summarize(deaths = sum(deaths)) | |
df_deaths_all$age_group <- "all" | |
df_deaths <- rbind(df_deaths, df_deaths_all) | |
# Summarize population data (no need to modify 85+ since it already exists) | |
df_pop <- pop %>% | |
mutate(age_group = case_when( | |
as.integer(age_group) < 30 ~ "0-29", | |
as.integer(age_group) >= 30 & as.integer(age_group) < 35 ~ "30-34", | |
as.integer(age_group) >= 35 & as.integer(age_group) < 40 ~ "35-39", | |
as.integer(age_group) >= 40 & as.integer(age_group) < 45 ~ "40-44", | |
as.integer(age_group) >= 45 & as.integer(age_group) < 50 ~ "45-49", | |
as.integer(age_group) >= 50 & as.integer(age_group) < 55 ~ "50-54", | |
as.integer(age_group) >= 55 & as.integer(age_group) < 60 ~ "55-59", | |
as.integer(age_group) >= 60 & as.integer(age_group) < 65 ~ "60-64", | |
as.integer(age_group) >= 65 & as.integer(age_group) < 70 ~ "65-69", | |
as.integer(age_group) >= 70 & as.integer(age_group) < 75 ~ "70-74", | |
as.integer(age_group) >= 75 & as.integer(age_group) < 80 ~ "75-79", | |
as.integer(age_group) >= 80 & as.integer(age_group) < 85 ~ "80-84", | |
TRUE ~ age_group # Keep 85+ and other existing groups | |
)) %>% | |
group_by(year, age_group) %>% | |
summarize(population = sum(population), .groups = "drop") | |
# Check if the age groups match between the two datasets | |
stopifnot(identical( | |
sort(unique(df_deaths$age_group)), | |
sort(unique(df_pop$age_group)) | |
)) | |
df <- df_deaths |> | |
inner_join(df_pop, by = join_by("year", "age_group")) |> | |
mutate( | |
date = make_yearweek(year, week), | |
is_senior = case_when( | |
age_group %in% c("65-69", "70-74", "75-79", "80-84", "85+") ~ TRUE, | |
.default = FALSE | |
) | |
) |> | |
filter(year %in% bl_start_year:obs_end_year, week <= 52) | |
# 1) Crude Mortality Rate (CMR) | |
df_cmr <- df |> | |
filter(age_group != "NS") |> | |
mutate(age_group = case_when( | |
!is_senior & age_group != "all" ~ "0-64", | |
is_senior & age_group != "all" ~ "65+", | |
.default = age_group | |
)) |> | |
group_by(date, age_group) |> | |
summarize( | |
deaths = sum(deaths), | |
population = sum(population), | |
.groups = "drop" | |
) | |
cmr <- df_cmr |> | |
mutate(cmr = deaths / population * per_factor) |> | |
group_by(age_group, week(date)) |> | |
mutate(cmr_bl = mean(cmr[year(date) %in% bl_start_year:bl_end_year])) |> | |
ungroup() |> | |
select(-`week(date)`) | |
stopifnot(nrow(cmr |> filter(is.na(cmr))) == 0) | |
stopifnot(nrow(cmr |> filter(is.na(cmr_bl))) == 0) | |
# 2) Age-Standardized Mortality Rate (ASMR) | |
calculate_asmr_for_group <- function(df_group, age_group_label) { | |
std_pop <- get_std_pop_weights(unique(df_group$age_group), usa2000) | |
std_pop$weight <- std_pop$weight / sum(std_pop$weight) # Scale to 1 | |
df_group |> | |
inner_join(std_pop, by = join_by(age_group)) |> | |
mutate(cmr = deaths / population * 100000) |> | |
group_by(year, week) |> | |
summarize(asmr = sum(cmr * weight), .groups = "drop") |> | |
mutate(date = make_yearweek(year, week)) |> | |
group_by(week) |> | |
mutate( | |
asmr_bl = mean(asmr[year %in% bl_start_year:bl_end_year]), | |
age_group = age_group_label | |
) |> | |
ungroup() | |
} | |
# Calculate ASMR for all, 0-64, and 65+ age groups | |
asmr_all <- calculate_asmr_for_group( | |
df |> filter(!age_group %in% c("NS", "all")), "all" | |
) | |
asmr_jr <- calculate_asmr_for_group( | |
df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64" | |
) | |
asmr_sr <- calculate_asmr_for_group( | |
df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+" | |
) | |
asmr <- bind_rows(asmr_all, asmr_jr, asmr_sr) |> arrange(date) | |
stopifnot(nrow(asmr |> filter(is.na(asmr))) == 0) | |
stopifnot(nrow(asmr |> filter(is.na(asmr_bl))) == 0) | |
# 3) Calculate Ages-Standardized Deaths (ASD) | |
calculate_asd_for_group <- function(df2, age_group_label) { | |
df2 |> | |
group_by(age_group, week) |> | |
summarize( | |
asd_bl = mean( | |
if_else( | |
year <= bl_end_year & population > 0, | |
deaths / population * per_factor, | |
NA | |
), | |
na.rm = TRUE | |
), | |
.groups = "drop" | |
) |> | |
inner_join( | |
df |> filter(year(date) >= bl_start_year, year(date) <= obs_end_year), | |
by = c("age_group", "week") | |
) |> | |
mutate(asd_bl = asd_bl * population / per_factor) |> | |
group_by(date) |> | |
summarize( | |
asd = sum(deaths, na.rm = TRUE), | |
asd_bl = sum(asd_bl, na.rm = TRUE), | |
.groups = "drop" | |
) |> | |
mutate(age_group = age_group_label) | |
} | |
# Calculate ASD for all, 0-64, and 65+ age groups | |
asd_all <- calculate_asd_for_group( | |
df |> filter(!age_group %in% c("NS", "all")), "all" | |
) | |
asd_jr <- calculate_asd_for_group( | |
df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64" | |
) | |
asd_sr <- calculate_asd_for_group( | |
df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+" | |
) | |
asd <- bind_rows(asd_all, asd_jr, asd_sr) |> arrange(date) | |
# Calculate Pandemic Totals | |
result <- cmr |> | |
inner_join(asmr, join_by(date, age_group)) |> | |
inner_join(asd, join_by(date, age_group)) | |
result$year <- as.character(result$year) | |
pandemic <- result |> | |
filter(year %in% obs_start_year:obs_end_year) |> | |
group_by(age_group) |> | |
summarise(across( | |
c(deaths, cmr, cmr_bl, asmr, asmr_bl, asd, asd_bl), | |
sum | |
), .groups = "drop") |> | |
mutate(year = "2020-2023", population = NA) | |
result <- bind_rows(result, pandemic) | |
# Calculate Excess | |
result <- result |> | |
mutate( | |
cmrx = cmr - cmr_bl, cmrx_p = cmrx / cmr_bl, | |
asmrx = asmr - asmr_bl, asmrx_p = asmrx / asmr_bl, | |
asdx = asd - asd_bl, asdx_p = asdx / asd_bl | |
) |> | |
relocate(cmrx, cmrx_p, .after = cmr_bl) |> | |
relocate(asmrx, asmrx_p, .after = asmr_bl) |> | |
relocate(asdx, asdx_p, .after = asd_bl) | |
stopifnot(nrow(result |> filter(deaths > 0, is.na(cmrx_p))) == 0) | |
stopifnot(nrow(result |> filter(deaths > 0, is.na(asmrx_p))) == 0) | |
stopifnot(nrow(result |> filter(deaths > 0, is.na(asdx_p))) == 0) | |
chart <- result |> | |
filter(!is.na(date)) |> | |
ggplot(aes(x = date)) + | |
geom_line(aes(y = cmrx_p, color = "CMR"), linewidth = .4) + | |
geom_line(aes(y = asmrx_p, color = "ASMR"), linewidth = .4) + | |
geom_line(aes(y = asdx_p, color = "ASD"), linewidth = .4) + | |
labs( | |
title = "Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]", | |
subtitle = "Mean 2017-'19 Baseline", | |
y = "Excess Mortality (%)", | |
x = "Month of Year", | |
color = "Mortality Type" | |
) + | |
scale_y_continuous(labels = scales::percent) + | |
theme_bw() + | |
facet_wrap(vars(age_group)) | |
ggplot2::ggsave( | |
filename = "chart1.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# Correlation | |
result %>% | |
group_by(age_group) %>% | |
summarise( | |
cor_cmr_asmr = cor(cmrx_p, asmrx_p, use = "complete.obs"), | |
cor_cmr_asxd = cor(cmrx_p, asdx_p, use = "complete.obs"), | |
cor_asmr_asxd = cor(asmrx_p, asdx_p, use = "complete.obs") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment