Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active February 13, 2025 03:31
Show Gist options
  • Save USMortality/7be69b3d8087c7e67139a770e3f1fabf to your computer and use it in GitHub Desktop.
Save USMortality/7be69b3d8087c7e67139a770e3f1fabf to your computer and use it in GitHub Desktop.
Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]
library(readr)
library(tidyr)
library(ggplot2)
library(dplyr)
library(stringr)
library(scales)
library(lubridate)
library(tsibble)
source(paste0(
"https://gist.githubusercontent.com/USMortality/",
"cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r"
))
sf <- 2
width <- 1080 * sf
height <- 1920 / 4 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Settings
per_factor <- 100000
bl_start_year <- 2017
bl_end_year <- 2019
obs_start_year <- 2020
obs_end_year <- 2024
# Deaths
deaths <- read_csv("https://s3.mortality.watch/data/deaths/deu/deaths.csv") |>
filter(jurisdiction == "Deutschland")
# Population
parse_age_groups <- function(df) {
df$age_group <- sub("unter 1 Jahr", "0", df$age_group)
df$age_group <- sub("unter 15 Jahre", "0-14", df$age_group)
df$age_group <- sub("50 Jahre und mehr", "50+", df$age_group)
df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group)
df$age_group <- sub("Insgesamt", "all", df$age_group)
df$age_group <- sub("Alter unbekannt", NA, df$age_group)
df$age_group <- sub("-Jährige", "", df$age_group)
df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group)
df
}
# Population
pop <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz",
delim = ";", skip = 6
)
# Cleanup
names(pop)[1] <- "age_group"
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1])
pop <- pop |>
filter(!is.na(`2000`)) |>
parse_age_groups()
pop <- pop |>
pivot_longer(
cols = !c("age_group"), names_to = "year", values_to = "population"
)
pop$year <- as.integer(pop$year)
pop$population <- as.integer(pop$population)
# Summarize deaths data and align age groups using case_when
# Align and summarize deaths data, consolidating 85+ groups
df_deaths <- deaths %>%
mutate(age_group = case_when(
age_group %in% c("85-89", "90-94", "95+", "Insgesamt") ~ "85+",
TRUE ~ age_group # Keep other age groups as they are
)) %>%
group_by(year, week, age_group) %>%
summarize(deaths = sum(deaths), .groups = "drop")
df_deaths_all <- df_deaths |>
group_by(year, week) |>
summarize(deaths = sum(deaths))
df_deaths_all$age_group <- "all"
df_deaths <- rbind(df_deaths, df_deaths_all)
# Summarize population data (no need to modify 85+ since it already exists)
df_pop <- pop %>%
mutate(age_group = case_when(
as.integer(age_group) < 30 ~ "0-29",
as.integer(age_group) >= 30 & as.integer(age_group) < 35 ~ "30-34",
as.integer(age_group) >= 35 & as.integer(age_group) < 40 ~ "35-39",
as.integer(age_group) >= 40 & as.integer(age_group) < 45 ~ "40-44",
as.integer(age_group) >= 45 & as.integer(age_group) < 50 ~ "45-49",
as.integer(age_group) >= 50 & as.integer(age_group) < 55 ~ "50-54",
as.integer(age_group) >= 55 & as.integer(age_group) < 60 ~ "55-59",
as.integer(age_group) >= 60 & as.integer(age_group) < 65 ~ "60-64",
as.integer(age_group) >= 65 & as.integer(age_group) < 70 ~ "65-69",
as.integer(age_group) >= 70 & as.integer(age_group) < 75 ~ "70-74",
as.integer(age_group) >= 75 & as.integer(age_group) < 80 ~ "75-79",
as.integer(age_group) >= 80 & as.integer(age_group) < 85 ~ "80-84",
TRUE ~ age_group # Keep 85+ and other existing groups
)) %>%
group_by(year, age_group) %>%
summarize(population = sum(population), .groups = "drop")
# Check if the age groups match between the two datasets
stopifnot(identical(
sort(unique(df_deaths$age_group)),
sort(unique(df_pop$age_group))
))
df <- df_deaths |>
inner_join(df_pop, by = join_by("year", "age_group")) |>
mutate(
date = make_yearweek(year, week),
is_senior = case_when(
age_group %in% c("65-69", "70-74", "75-79", "80-84", "85+") ~ TRUE,
.default = FALSE
)
) |>
filter(year %in% bl_start_year:obs_end_year, week <= 52)
# 1) Crude Mortality Rate (CMR)
df_cmr <- df |>
filter(age_group != "NS") |>
mutate(age_group = case_when(
!is_senior & age_group != "all" ~ "0-64",
is_senior & age_group != "all" ~ "65+",
.default = age_group
)) |>
group_by(date, age_group) |>
summarize(
deaths = sum(deaths),
population = sum(population),
.groups = "drop"
)
cmr <- df_cmr |>
mutate(cmr = deaths / population * per_factor) |>
group_by(age_group, week(date)) |>
mutate(cmr_bl = mean(cmr[year(date) %in% bl_start_year:bl_end_year])) |>
ungroup() |>
select(-`week(date)`)
stopifnot(nrow(cmr |> filter(is.na(cmr))) == 0)
stopifnot(nrow(cmr |> filter(is.na(cmr_bl))) == 0)
# 2) Age-Standardized Mortality Rate (ASMR)
calculate_asmr_for_group <- function(df_group, age_group_label) {
std_pop <- get_std_pop_weights(unique(df_group$age_group), usa2000)
std_pop$weight <- std_pop$weight / sum(std_pop$weight) # Scale to 1
df_group |>
inner_join(std_pop, by = join_by(age_group)) |>
mutate(cmr = deaths / population * 100000) |>
group_by(year, week) |>
summarize(asmr = sum(cmr * weight), .groups = "drop") |>
mutate(date = make_yearweek(year, week)) |>
group_by(week) |>
mutate(
asmr_bl = mean(asmr[year %in% bl_start_year:bl_end_year]),
age_group = age_group_label
) |>
ungroup()
}
# Calculate ASMR for all, 0-64, and 65+ age groups
asmr_all <- calculate_asmr_for_group(
df |> filter(!age_group %in% c("NS", "all")), "all"
)
asmr_jr <- calculate_asmr_for_group(
df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
)
asmr_sr <- calculate_asmr_for_group(
df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
)
asmr <- bind_rows(asmr_all, asmr_jr, asmr_sr) |> arrange(date)
stopifnot(nrow(asmr |> filter(is.na(asmr))) == 0)
stopifnot(nrow(asmr |> filter(is.na(asmr_bl))) == 0)
# 3) Calculate Ages-Standardized Deaths (ASD)
calculate_asd_for_group <- function(df2, age_group_label) {
df2 |>
group_by(age_group, week) |>
summarize(
asd_bl = mean(
if_else(
year <= bl_end_year & population > 0,
deaths / population * per_factor,
NA
),
na.rm = TRUE
),
.groups = "drop"
) |>
inner_join(
df |> filter(year(date) >= bl_start_year, year(date) <= obs_end_year),
by = c("age_group", "week")
) |>
mutate(asd_bl = asd_bl * population / per_factor) |>
group_by(date) |>
summarize(
asd = sum(deaths, na.rm = TRUE),
asd_bl = sum(asd_bl, na.rm = TRUE),
.groups = "drop"
) |>
mutate(age_group = age_group_label)
}
# Calculate ASD for all, 0-64, and 65+ age groups
asd_all <- calculate_asd_for_group(
df |> filter(!age_group %in% c("NS", "all")), "all"
)
asd_jr <- calculate_asd_for_group(
df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
)
asd_sr <- calculate_asd_for_group(
df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
)
asd <- bind_rows(asd_all, asd_jr, asd_sr) |> arrange(date)
# Calculate Pandemic Totals
result <- cmr |>
inner_join(asmr, join_by(date, age_group)) |>
inner_join(asd, join_by(date, age_group))
result$year <- as.character(result$year)
pandemic <- result |>
filter(year %in% obs_start_year:obs_end_year) |>
group_by(age_group) |>
summarise(across(
c(deaths, cmr, cmr_bl, asmr, asmr_bl, asd, asd_bl),
sum
), .groups = "drop") |>
mutate(year = "2020-2023", population = NA)
result <- bind_rows(result, pandemic)
# Calculate Excess
result <- result |>
mutate(
cmrx = cmr - cmr_bl, cmrx_p = cmrx / cmr_bl,
asmrx = asmr - asmr_bl, asmrx_p = asmrx / asmr_bl,
asdx = asd - asd_bl, asdx_p = asdx / asd_bl
) |>
relocate(cmrx, cmrx_p, .after = cmr_bl) |>
relocate(asmrx, asmrx_p, .after = asmr_bl) |>
relocate(asdx, asdx_p, .after = asd_bl)
stopifnot(nrow(result |> filter(deaths > 0, is.na(cmrx_p))) == 0)
stopifnot(nrow(result |> filter(deaths > 0, is.na(asmrx_p))) == 0)
stopifnot(nrow(result |> filter(deaths > 0, is.na(asdx_p))) == 0)
chart <- result |>
filter(!is.na(date)) |>
ggplot(aes(x = date)) +
geom_line(aes(y = cmrx_p, color = "CMR"), linewidth = .4) +
geom_line(aes(y = asmrx_p, color = "ASMR"), linewidth = .4) +
geom_line(aes(y = asdx_p, color = "ASD"), linewidth = .4) +
labs(
title = "Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]",
subtitle = "Mean 2017-'19 Baseline",
y = "Excess Mortality (%)",
x = "Month of Year",
color = "Mortality Type"
) +
scale_y_continuous(labels = scales::percent) +
theme_bw() +
facet_wrap(vars(age_group))
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Correlation
result %>%
group_by(age_group) %>%
summarise(
cor_cmr_asmr = cor(cmrx_p, asmrx_p, use = "complete.obs"),
cor_cmr_asxd = cor(cmrx_p, asdx_p, use = "complete.obs"),
cor_asmr_asxd = cor(asmrx_p, asdx_p, use = "complete.obs")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment