Last active
February 27, 2025 03:13
-
-
Save USMortality/d1f3b7d1785ef9b1f42358dcba232af4 to your computer and use it in GitHub Desktop.
Age-Standardized Mortality Rate (ASMR) [Germany]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scales) | |
library(zoo) | |
library(ggrepel) | |
sf <- 2 | |
width <- 600 * sf | |
height <- 335 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
parse_age_groups <- function(df) { | |
df$age_group <- sub("unter 1 Jahr", "0", df$age_group) | |
df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group) | |
df$age_group <- sub("Insgesamt", "all", df$age_group) | |
df$age_group <- sub("-Jährige", "", df$age_group) | |
df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group) | |
df | |
} | |
summarize_age_groups <- function(df, col, target_ag) { | |
generate_age_group <- function(age) { | |
# Return NA if input is NA | |
if (is.na(age)) { | |
return(NA_character_) | |
} | |
# Convert age to character and remove "+" if present | |
age_char <- as.character(age) | |
age_num <- as.numeric(sub("\\+", "", age_char)) | |
# Check if conversion to numeric failed | |
if (is.na(age_num)) { | |
return(NA_character_) | |
} | |
# Iterate through age groups | |
for (range in target_ag) { | |
if (grepl("\\+$", range)) { | |
bound <- as.numeric(sub("\\+", "", range)) | |
if (age_num >= bound) { | |
return(range) | |
} | |
} else if (grepl("-", range)) { | |
bounds <- as.numeric(unlist(strsplit(range, "-"))) | |
if (age_num >= bounds[1] && age_num <= bounds[2]) { | |
return(range) | |
} | |
} else { | |
bound <- as.numeric(range) | |
if (age_num == bound) { | |
return(range) | |
} | |
} | |
} | |
# Return NA if no match found | |
return(NA_character_) | |
} | |
df |> | |
mutate( | |
age_group = case_when( | |
age_group == "all" ~ "all", | |
TRUE ~ sapply(age_group, generate_age_group) | |
) | |
) |> | |
group_by(date, age_group) |> | |
summarize({{ col }} := sum(.data[[col]], na.rm = TRUE), .groups = "drop") |> | |
filter(!is.na(age_group)) | |
} | |
days_in_year <- function(year) { | |
start_date <- ymd(paste0(year, "-01-01")) | |
end_date <- ymd(paste0(year, "-12-31")) | |
as.integer(difftime(end_date, start_date, units = "days")) + 1 | |
} | |
interpolate_daily <- function(data, col, ref_day) { | |
data <- data |> | |
mutate(date = as.Date(paste0(date, "-", ref_day))) |> | |
group_by(age_group) |> | |
complete(date = seq(min(date), max(date), by = "day")) | |
if ("population" %in% names(data)) { | |
data <- mutate(data, across({{ col }}, ~ na.approx(as.numeric(.), | |
rule = 2, na.rm = FALSE | |
))) | |
} | |
if ("deaths" %in% names(data)) { | |
data <- data |> | |
fill(deaths) |> | |
mutate(deaths = deaths / days_in_year(year(date))) | |
} | |
data | |
} | |
ag5 <- c("0-14", "15-64", "65-74", "75-84", "85+") | |
ag10y <- c( | |
"0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+" | |
) | |
# Population | |
pop <- read_delim( | |
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz", | |
delim = ";", skip = 6 | |
) | |
names(pop)[1] <- "age_group" | |
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1]) | |
pop <- pop |> | |
filter(!is.na(`2000`)) |> | |
parse_age_groups() | |
pop <- pop |> | |
pivot_longer( | |
cols = !c("age_group"), names_to = "date", values_to = "population" | |
) | |
pop$date <- as.integer(pop$date) | |
pop$population <- as.integer(pop$population) | |
pop_1y_daily <- pop |> interpolate_daily("population", ref_day = "12-31") | |
# Deaths | |
deaths <- read_delim( | |
"https://apify.mortality.watch/destatis-genesis/12613-0003.csv.gz", | |
delim = ";", | |
skip = 5, | |
col_types = cols( | |
.default = col_integer(), | |
`...1` = col_character(), | |
`...2` = col_character() | |
) | |
) | |
names(deaths)[1] <- "sex" | |
names(deaths)[2] <- "age_group" | |
deaths <- deaths |> | |
fill(sex, .direction = "down") |> | |
filter(sex == "Insgesamt") |> | |
pivot_longer( | |
cols = !c("sex", "age_group"), names_to = "date", values_to = "deaths" | |
) |> | |
mutate(date = as.integer(date)) |> | |
filter(!is.na(date)) |> | |
parse_age_groups() |> | |
summarize_age_groups("deaths", c(as.character(0:84), "85+")) | |
interpolate_with_future <- function(df) { | |
last <- df |> filter(date == max(date)) | |
last$date <- as.character(as.numeric(last$date) + 1) | |
last$deaths <- NA | |
rbind(df, last) |> | |
interpolate_daily("deaths", "01-01") |> | |
filter(date != as.Date(paste0(unique(last$date), "-01-01"))) | |
} | |
deaths_1y_daily <- deaths |> interpolate_with_future() | |
calc_asmr <- function(pop, deaths) { | |
# Combine Daily Deaths/Population | |
df <- pop |> | |
inner_join(deaths, by = join_by(age_group, date)) |> | |
group_by(date, age_group) |> | |
mutate(cmr = deaths / population * 100000) |> | |
mutate(date = ifelse(is.Date(date), year(date), date)) |> | |
group_by(date, age_group) |> | |
summarize( | |
population = mean(population), cmr = sum(cmr), | |
.groups = "drop" | |
) |> | |
filter(date > 1970) | |
# Std Pop 2020 | |
std_pop <- pop |> | |
filter(date == ifelse(is.Date(date), as.Date("2020-01-01"), 2020)) |> | |
group_by(date, age_group) |> | |
summarize(population = sum(population), .groups = "drop") |> | |
mutate(weight = if_else( | |
age_group != "all", | |
population / population[age_group == "all"], | |
NA_real_ | |
)) |> | |
filter(age_group != "all") |> | |
select(age_group, weight) | |
# ASMR | |
df |> | |
filter(date > 1990) |> | |
inner_join(std_pop, by = join_by(age_group)) |> | |
group_by(date) |> | |
summarize(asmr = sum(cmr * weight)) | |
} | |
asmr <- calc_asmr(pop_1y_daily, deaths_1y_daily) | |
# Plot | |
chart <- asmr |> ggplot(aes(x = date, y = asmr)) + | |
geom_line() + | |
geom_text_repel( | |
aes( | |
label = round(asmr), | |
vjust = ifelse(date %% 2 == 0, -1, 2) | |
), | |
size = 3, | |
) + | |
labs( | |
title = "Age-Standardized Mortality Rate (ASMR) [Germany]", | |
subtitle = paste0( | |
"1y, 0-80+ · Std. Pop.: 2020 · Source: Statistical Office ", | |
"Germany (destatis) · @USMortality" | |
), | |
caption = paste0( | |
"Daily population interpolated from 12/31. Deaths spread daily. ", | |
"CMR weighted by std population, then aggregated for yearly ASMR." | |
), | |
x = "Year", | |
y = "ASMR" | |
) + | |
scale_y_continuous(labels = comma) + | |
theme_bw() | |
ggplot2::ggsave( | |
filename = "chart1.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# Excess | |
# Parameters | |
n <- 3 # Length of short-term average | |
h <- 4 # Forecast horizon (2017-2020 to 2021-2024) | |
obs_start_year <- 2020 | |
# Step 1: Filter for the 2017-2020 period to determine the baseline level | |
baseline_period <- asmr |> | |
filter(date >= obs_start_year - n & date < obs_start_year) |> | |
summarise(level = mean(asmr, na.rm = TRUE)) |> | |
pull(level) | |
# Step 2: Calculate the variance in percent for each historical | |
# 3-year + 3-year slice | |
variance_percent <- function(data, n, h) { | |
changes <- list() | |
for (i in 1:(nrow(data) - (n + h) + 1)) { | |
slice1 <- data$asmr[i:(i + n - 1)] | |
slice2 <- data$asmr[(i + n):(i + n + h - 1)] | |
percent_change <- (mean(slice2) - mean(slice1)) / mean(slice1) * 100 | |
changes <- append(changes, percent_change) | |
} | |
return(changes) | |
} | |
# Calculate historical variances | |
variances <- variance_percent(asmr |> filter(date < obs_start_year), n, h) | |
# Step 3: Use the variance to construct the expected interval | |
expected_interval <- function(level, variances) { | |
mean_variance <- mean(unlist(variances), na.rm = TRUE) | |
interval_lower <- level * (1 - mean_variance / 100) | |
interval_upper <- level * (1 + mean_variance / 100) | |
return(tibble::tibble( | |
interval_lower = interval_lower, | |
interval_upper = interval_upper | |
)) | |
} | |
# Step 4: Calculate the forecast interval for 2021-2024 based on the | |
# 2017-2020 baseline | |
forecast_result <- expected_interval(baseline_period, variances) | |
asmr_plot <- asmr |> | |
filter(date >= 2000 & date <= 2024) |> | |
mutate(forecast = ifelse(date >= 2020, "Forecast", "Actual")) | |
chart <- ggplot(asmr_plot, aes(x = date, y = asmr)) + | |
geom_line() + | |
geom_text_repel( | |
aes( | |
label = round(asmr), | |
vjust = ifelse(date %% 2 == 0, -1, 2) | |
), | |
size = 3, | |
) + | |
geom_ribbon( | |
data = tibble( | |
date = 2020:2024, | |
interval_lower = forecast_result$interval_lower, | |
interval_upper = forecast_result$interval_upper | |
), | |
aes(x = date, ymin = interval_lower, ymax = interval_upper), | |
inherit.aes = FALSE, | |
alpha = 0.2, fill = "blue" | |
) + | |
labs( | |
title = | |
"Age-Standardized Mortality Rate (ASMR) & Expected Levels [Germany]", | |
subtitle = | |
paste0( | |
"1y, 0-85+ · Std. Pop.: 2020 · Source: Statistical Office ", | |
"(destatis) · @USMortality" | |
), | |
caption = paste0( | |
"Based on a 3y average (2017-19), the forecast interval reflects the ", | |
"3y variance observed over 1970-2019. This provides an expected range\n", | |
"for future values." | |
), | |
y = "ASMR", | |
x = "Year" | |
) + | |
theme_bw() | |
ggplot2::ggsave( | |
filename = "chart2.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# source("/Users/ben/Downloads/2.r") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scales) | |
library(zoo) | |
library(ggrepel) | |
sf <- 2 | |
width <- 600 * sf | |
height <- 335 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
parse_age_groups <- function(df) { | |
df$age_group <- sub("unter 1 Jahr", "0", df$age_group) | |
df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group) | |
df$age_group <- sub("Insgesamt", "all", df$age_group) | |
df$age_group <- sub("-Jährige", "", df$age_group) | |
df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group) | |
df | |
} | |
summarize_age_groups <- function(df, col, target_ag) { | |
generate_age_group <- function(age) { | |
# Return NA if input is NA | |
if (is.na(age)) { | |
return(NA_character_) | |
} | |
# Convert age to character and remove "+" if present | |
age_char <- as.character(age) | |
age_num <- as.numeric(sub("\\+", "", age_char)) | |
# Check if conversion to numeric failed | |
if (is.na(age_num)) { | |
return(NA_character_) | |
} | |
# Iterate through age groups | |
for (range in target_ag) { | |
if (grepl("\\+$", range)) { | |
bound <- as.numeric(sub("\\+", "", range)) | |
if (age_num >= bound) { | |
return(range) | |
} | |
} else if (grepl("-", range)) { | |
bounds <- as.numeric(unlist(strsplit(range, "-"))) | |
if (age_num >= bounds[1] && age_num <= bounds[2]) { | |
return(range) | |
} | |
} else { | |
bound <- as.numeric(range) | |
if (age_num == bound) { | |
return(range) | |
} | |
} | |
} | |
# Return NA if no match found | |
return(NA_character_) | |
} | |
df |> | |
mutate( | |
age_group = case_when( | |
age_group == "all" ~ "all", | |
TRUE ~ sapply(age_group, generate_age_group) | |
) | |
) |> | |
group_by(date, age_group) |> | |
summarize({{ col }} := sum(.data[[col]], na.rm = TRUE), .groups = "drop") |> | |
filter(!is.na(age_group)) | |
} | |
days_in_year <- function(year) { | |
start_date <- ymd(paste0(year, "-01-01")) | |
end_date <- ymd(paste0(year, "-12-31")) | |
as.integer(difftime(end_date, start_date, units = "days")) + 1 | |
} | |
interpolate_daily <- function(data, col, ref_day) { | |
data <- data |> | |
mutate(date = as.Date(paste0(date, "-", ref_day))) |> | |
group_by(age_group) |> | |
complete(date = seq(min(date), max(date), by = "day")) | |
if ("population" %in% names(data)) { | |
data <- mutate(data, across({{ col }}, ~ na.approx(as.numeric(.), | |
rule = 2, na.rm = FALSE | |
))) | |
} | |
if ("deaths" %in% names(data)) { | |
data <- data |> | |
fill(deaths) |> | |
mutate(deaths = deaths / days_in_year(year(date))) | |
} | |
data | |
} | |
ag5 <- c("0-14", "15-64", "65-74", "75-84", "85+") | |
ag10y <- c( | |
"0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+" | |
) | |
# Population | |
pop <- read_delim( | |
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz", | |
delim = ";", skip = 6 | |
) | |
names(pop)[1] <- "age_group" | |
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1]) | |
pop <- pop |> | |
filter(!is.na(`2000`)) |> | |
parse_age_groups() | |
pop <- pop |> | |
pivot_longer( | |
cols = !c("age_group"), names_to = "date", values_to = "population" | |
) | |
pop$date <- as.integer(pop$date) | |
pop$population <- as.integer(pop$population) | |
pop_5 <- pop |> summarize_age_groups("population", ag5) | |
pop_10y <- pop |> summarize_age_groups("population", ag10y) | |
pop_10y_daily <- pop_10y |> interpolate_daily("population", ref_day = "12-31") | |
pop_1y_daily <- pop |> interpolate_daily("population", ref_day = "12-31") | |
# Deaths | |
deaths <- read_delim( | |
"https://apify.mortality.watch/destatis-genesis/12613-0003.csv.gz", | |
delim = ";", | |
skip = 5, | |
col_types = cols( | |
.default = col_integer(), | |
`...1` = col_character(), | |
`...2` = col_character() | |
) | |
) | |
names(deaths)[1] <- "sex" | |
names(deaths)[2] <- "age_group" | |
deaths <- deaths |> | |
fill(sex, .direction = "down") |> | |
filter(sex == "Insgesamt") |> | |
pivot_longer( | |
cols = !c("sex", "age_group"), names_to = "date", values_to = "deaths" | |
) |> | |
mutate(date = as.integer(date)) |> | |
filter(!is.na(date)) |> | |
parse_age_groups() |> | |
summarize_age_groups("deaths", c(as.character(0:84), "85+")) | |
interpolate_with_future <- function(df) { | |
last <- df |> filter(date == max(date)) | |
last$date <- as.character(as.numeric(last$date) + 1) | |
last$deaths <- NA | |
rbind(df, last) |> | |
interpolate_daily("deaths", "01-01") |> | |
filter(date != as.Date(paste0(unique(last$date), "-01-01"))) | |
} | |
deaths_5 <- deaths |> summarize_age_groups("deaths", ag5) | |
deaths_10y <- deaths |> summarize_age_groups("deaths", ag10y) | |
deaths_10y_daily <- deaths_10y |> interpolate_with_future() | |
deaths_1y_daily <- deaths |> interpolate_with_future() | |
calc_asmr <- function(pop, deaths) { | |
# Combine Daily Deaths/Population | |
df <- pop |> | |
inner_join(deaths, by = join_by(age_group, date)) |> | |
group_by(date, age_group) |> | |
mutate(cmr = deaths / population * 100000) |> | |
mutate(date = ifelse(is.Date(date), year(date), date)) |> | |
group_by(date, age_group) |> | |
summarize( | |
population = mean(population), cmr = sum(cmr), | |
.groups = "drop" | |
) |> | |
filter(date > 1970) | |
# Std Pop 2020 | |
std_pop <- pop |> | |
filter(date == ifelse(is.Date(date), as.Date("2020-01-01"), 2020)) |> | |
group_by(date, age_group) |> | |
summarize(population = sum(population), .groups = "drop") |> | |
mutate(weight = if_else( | |
age_group != "all", | |
population / population[age_group == "all"], | |
NA_real_ | |
)) |> | |
filter(age_group != "all") |> | |
select(age_group, weight) | |
# ASMR | |
df |> | |
filter(date > 1990) |> | |
inner_join(std_pop, by = join_by(age_group)) |> | |
group_by(date) |> | |
summarize(asmr = sum(cmr * weight)) | |
} | |
# 1) Simple year joins, by Mortality.org age groups | |
# (0-14, 15-64, 65-74, 75-84, 85+) | |
df1 <- calc_asmr(pop_5, deaths_5) | |
# 2) Simple year joins, by 10y age groups (0-10, .., 80+) | |
df2 <- calc_asmr(pop_10y, deaths_10y) | |
# 3) Daily year joins, by 10y age groups (0-10, .., 80+) | |
df3 <- calc_asmr(pop_10y_daily, deaths_10y_daily) | |
# 4) Simple year joins, by single age groups (0-85) | |
df4 <- calc_asmr(pop, deaths) | |
# 5) Daily year joins, by single age groups | |
df5 <- calc_asmr(pop_1y_daily, deaths_1y_daily) | |
asmr <- df1 |> | |
left_join(df2, by = "date") |> | |
left_join(df3, by = "date") |> | |
left_join(df4, by = "date") |> | |
left_join(df5, by = "date") |> | |
setNames(c("date", "ag5", "10y", "10y_daily", "1y", "1y_daily")) |> | |
pivot_longer(cols = 2:6, names_to = "type", values_to = "asmr") | |
df_plot <- asmr |> filter(date >= 2010) | |
# Plot | |
chart <- | |
df_plot |> | |
ggplot(aes(x = date, y = asmr, color = type, linetype = type)) + | |
geom_line() + | |
scale_color_manual(values = c( | |
"ag5" = "blue", | |
"1y_daily" = "black", | |
"1y" = "black", | |
"10y" = "red", | |
"10y_daily" = "red" | |
)) + | |
scale_linetype_manual(values = c( | |
"ag5" = "solid", | |
"1y_daily" = "solid", | |
"1y" = "dashed", | |
"10y" = "dashed", | |
"10y_daily" = "solid" | |
)) + | |
geom_text_repel( | |
data = subset(df_plot, type == "10y_daily"), | |
aes( | |
label = round(asmr), | |
vjust = ifelse(date %% 2 == 0, -1, 2) | |
), | |
size = 3 | |
) + | |
labs( | |
title = paste0( | |
"Age-Standardized Mortality Rate (ASMR) by Calculation ", | |
"Strategy [Germany]" | |
), | |
subtitle = paste0( | |
"Std. Pop.: 2020 · Source: Statistical Office ", | |
"Germany (destatis) · @USMortality" | |
), | |
x = "Year", | |
y = "ASMR" | |
) + | |
scale_y_continuous(labels = comma) + | |
theme_bw() | |
ggplot2::ggsave( | |
filename = "chart3.png", plot = chart, width = width, height = height, | |
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
) | |
# source("/Users/ben/Downloads/1.r") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.mortality.watch/charts/list.html#age-standardized-mortality-rate-asmr-germany