USMortality · February 13, 2025 03:31
diff --git a/chart_germany_asmr_asd.r b/chart_germany_asmr_asd.r
 library(readr)
 library(tidyr)
 library(ggplot2)
 library(dplyr)
 library(stringr)
 library(scales)
 library(lubridate)
 library(tsibble)

 source(paste0(
  "https://gist.githubusercontent.com/USMortality/",
  "cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r"
 ))

 sf <- 2
 width <- 1080 * sf
 height <- 1920 / 4 * sf

 options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))

 # Settings
 per_factor <- 100000
 bl_start_year <- 2017
 bl_end_year <- 2019
 obs_start_year <- 2020
 obs_end_year <- 2024

 # Deaths
 deaths <- read_csv("https://s3.mortality.watch/data/deaths/deu/deaths.csv") |>
  filter(jurisdiction == "Deutschland")

 # Population
 parse_age_groups <- function(df) {
  df$age_group <- sub("unter 1 Jahr", "0", df$age_group)
  df$age_group <- sub("unter 15 Jahre", "0-14", df$age_group)
  df$age_group <- sub("50 Jahre und mehr", "50+", df$age_group)
  df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group)
  df$age_group <- sub("Insgesamt", "all", df$age_group)
  df$age_group <- sub("Alter unbekannt", NA, df$age_group)
  df$age_group <- sub("-Jährige", "", df$age_group)
  df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group)
  df
 }

 # Population
 pop <- read_delim(
  "https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz",
  delim = ";", skip = 6
 )

 # Cleanup
 names(pop)[1] <- "age_group"
 names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1])
 pop <- pop |>
  filter(!is.na(`2000`)) |>
  parse_age_groups()
 pop <- pop |>
  pivot_longer(
    cols = !c("age_group"), names_to = "year", values_to = "population"
  )
 pop$year <- as.integer(pop$year)
 pop$population <- as.integer(pop$population)


 # Summarize deaths data and align age groups using case_when
 # Align and summarize deaths data, consolidating 85+ groups
 df_deaths <- deaths %>%
  mutate(age_group = case_when(
    age_group %in% c("85-89", "90-94", "95+", "Insgesamt") ~ "85+",
    TRUE ~ age_group # Keep other age groups as they are
  )) %>%
  group_by(year, week, age_group) %>%
  summarize(deaths = sum(deaths), .groups = "drop")

 df_deaths_all <- df_deaths |>
  group_by(year, week) |>
  summarize(deaths = sum(deaths))
 df_deaths_all$age_group <- "all"

 df_deaths <- rbind(df_deaths, df_deaths_all)

 # Summarize population data (no need to modify 85+ since it already exists)
 df_pop <- pop %>%
  mutate(age_group = case_when(
    as.integer(age_group) < 30 ~ "0-29",
    as.integer(age_group) >= 30 & as.integer(age_group) < 35 ~ "30-34",
    as.integer(age_group) >= 35 & as.integer(age_group) < 40 ~ "35-39",
    as.integer(age_group) >= 40 & as.integer(age_group) < 45 ~ "40-44",
    as.integer(age_group) >= 45 & as.integer(age_group) < 50 ~ "45-49",
    as.integer(age_group) >= 50 & as.integer(age_group) < 55 ~ "50-54",
    as.integer(age_group) >= 55 & as.integer(age_group) < 60 ~ "55-59",
    as.integer(age_group) >= 60 & as.integer(age_group) < 65 ~ "60-64",
    as.integer(age_group) >= 65 & as.integer(age_group) < 70 ~ "65-69",
    as.integer(age_group) >= 70 & as.integer(age_group) < 75 ~ "70-74",
    as.integer(age_group) >= 75 & as.integer(age_group) < 80 ~ "75-79",
    as.integer(age_group) >= 80 & as.integer(age_group) < 85 ~ "80-84",
    TRUE ~ age_group # Keep 85+ and other existing groups
  )) %>%
  group_by(year, age_group) %>%
  summarize(population = sum(population), .groups = "drop")

 # Check if the age groups match between the two datasets
 stopifnot(identical(
  sort(unique(df_deaths$age_group)),
  sort(unique(df_pop$age_group))
 ))

 df <- df_deaths |>
  inner_join(df_pop, by = join_by("year", "age_group")) |>
  mutate(
    date = make_yearweek(year, week),
    is_senior = case_when(
      age_group %in% c("65-69", "70-74", "75-79", "80-84", "85+") ~ TRUE,
      .default = FALSE
    )
  ) |>
  filter(year %in% bl_start_year:obs_end_year, week <= 52)


 # 1) Crude Mortality Rate (CMR)
 df_cmr <- df |>
  filter(age_group != "NS") |>
  mutate(age_group = case_when(
    !is_senior & age_group != "all" ~ "0-64",
    is_senior & age_group != "all" ~ "65+",
    .default = age_group
  )) |>
  group_by(date, age_group) |>
  summarize(
    deaths = sum(deaths),
    population = sum(population),
    .groups = "drop"
  )

 cmr <- df_cmr |>
  mutate(cmr = deaths / population * per_factor) |>
  group_by(age_group, week(date)) |>
  mutate(cmr_bl = mean(cmr[year(date) %in% bl_start_year:bl_end_year])) |>
  ungroup() |>
  select(-`week(date)`)
 stopifnot(nrow(cmr |> filter(is.na(cmr))) == 0)
 stopifnot(nrow(cmr |> filter(is.na(cmr_bl))) == 0)

 # 2) Age-Standardized Mortality Rate (ASMR)
 calculate_asmr_for_group <- function(df_group, age_group_label) {
  std_pop <- get_std_pop_weights(unique(df_group$age_group), usa2000)
  std_pop$weight <- std_pop$weight / sum(std_pop$weight) # Scale to 1
  df_group |>
    inner_join(std_pop, by = join_by(age_group)) |>
    mutate(cmr = deaths / population * 100000) |>
    group_by(year, week) |>
    summarize(asmr = sum(cmr * weight), .groups = "drop") |>
    mutate(date = make_yearweek(year, week)) |>
    group_by(week) |>
    mutate(
      asmr_bl = mean(asmr[year %in% bl_start_year:bl_end_year]),
      age_group = age_group_label
    ) |>
    ungroup()
 }

 # Calculate ASMR for all, 0-64, and 65+ age groups
 asmr_all <- calculate_asmr_for_group(
  df |> filter(!age_group %in% c("NS", "all")), "all"
 )
 asmr_jr <- calculate_asmr_for_group(
  df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
 )
 asmr_sr <- calculate_asmr_for_group(
  df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
 )

 asmr <- bind_rows(asmr_all, asmr_jr, asmr_sr) |> arrange(date)
 stopifnot(nrow(asmr |> filter(is.na(asmr))) == 0)
 stopifnot(nrow(asmr |> filter(is.na(asmr_bl))) == 0)


 # 3) Calculate Ages-Standardized Deaths (ASD)
 calculate_asd_for_group <- function(df2, age_group_label) {
  df2 |>
    group_by(age_group, week) |>
    summarize(
      asd_bl = mean(
        if_else(
          year <= bl_end_year & population > 0,
          deaths / population * per_factor,
          NA
        ),
        na.rm = TRUE
      ),
      .groups = "drop"
    ) |>
    inner_join(
      df |> filter(year(date) >= bl_start_year, year(date) <= obs_end_year),
      by = c("age_group", "week")
    ) |>
    mutate(asd_bl = asd_bl * population / per_factor) |>
    group_by(date) |>
    summarize(
      asd = sum(deaths, na.rm = TRUE),
      asd_bl = sum(asd_bl, na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(age_group = age_group_label)
 }

 # Calculate ASD for all, 0-64, and 65+ age groups
 asd_all <- calculate_asd_for_group(
  df |> filter(!age_group %in% c("NS", "all")), "all"
 )
 asd_jr <- calculate_asd_for_group(
  df |> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
 )
 asd_sr <- calculate_asd_for_group(
  df |> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
 )

 asd <- bind_rows(asd_all, asd_jr, asd_sr) |> arrange(date)


 # Calculate Pandemic Totals
 result <- cmr |>
  inner_join(asmr, join_by(date, age_group)) |>
  inner_join(asd, join_by(date, age_group))
 result$year <- as.character(result$year)

 pandemic <- result |>
  filter(year %in% obs_start_year:obs_end_year) |>
  group_by(age_group) |>
  summarise(across(
    c(deaths, cmr, cmr_bl, asmr, asmr_bl, asd, asd_bl),
    sum
  ), .groups = "drop") |>
  mutate(year = "2020-2023", population = NA)
 result <- bind_rows(result, pandemic)

 # Calculate Excess
 result <- result |>
  mutate(
    cmrx = cmr - cmr_bl, cmrx_p = cmrx / cmr_bl,
    asmrx = asmr - asmr_bl, asmrx_p = asmrx / asmr_bl,
    asdx = asd - asd_bl, asdx_p = asdx / asd_bl
  ) |>
  relocate(cmrx, cmrx_p, .after = cmr_bl) |>
  relocate(asmrx, asmrx_p, .after = asmr_bl) |>
  relocate(asdx, asdx_p, .after = asd_bl)

 stopifnot(nrow(result |> filter(deaths > 0, is.na(cmrx_p))) == 0)
 stopifnot(nrow(result |> filter(deaths > 0, is.na(asmrx_p))) == 0)
 stopifnot(nrow(result |> filter(deaths > 0, is.na(asdx_p))) == 0)

 chart <- result |>
  filter(!is.na(date)) |>
  ggplot(aes(x = date)) +
  geom_line(aes(y = cmrx_p, color = "CMR"), linewidth = .4) +
  geom_line(aes(y = asmrx_p, color = "ASMR"), linewidth = .4) +
  geom_line(aes(y = asdx_p, color = "ASD"), linewidth = .4) +
  labs(
    title = "Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]",
    subtitle = "Mean 2017-'19 Baseline",
    y = "Excess Mortality (%)",
    x = "Month of Year",
    color = "Mortality Type"
  ) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() +
  facet_wrap(vars(age_group))

 ggplot2::ggsave(
  filename = "chart1.png", plot = chart, width = width, height = height,
  units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
 )

 # Correlation
 result %>%
  group_by(age_group) %>%
  summarise(
    cor_cmr_asmr = cor(cmrx_p, asmrx_p, use = "complete.obs"),
    cor_cmr_asxd = cor(cmrx_p, asdx_p, use = "complete.obs"),
    cor_asmr_asxd = cor(asmrx_p, asdx_p, use = "complete.obs")
  )
	library(readr)
	library(tidyr)
	library(ggplot2)
	library(dplyr)
	library(stringr)
	library(scales)
	library(lubridate)
	library(tsibble)

	source(paste0(
	"https://gist.githubusercontent.com/USMortality/",
	"cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r"
	))

	sf <- 2
	width <- 1080 * sf
	height <- 1920 / 4 * sf

	options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))

	# Settings
	per_factor <- 100000
	bl_start_year <- 2017
	bl_end_year <- 2019
	obs_start_year <- 2020
	obs_end_year <- 2024

	# Deaths
	deaths <- read_csv("https://s3.mortality.watch/data/deaths/deu/deaths.csv") \|>
	filter(jurisdiction == "Deutschland")

	# Population
	parse_age_groups <- function(df) {
	df$age_group <- sub("unter 1 Jahr", "0", df$age_group)
	df$age_group <- sub("unter 15 Jahre", "0-14", df$age_group)
	df$age_group <- sub("50 Jahre und mehr", "50+", df$age_group)
	df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group)
	df$age_group <- sub("Insgesamt", "all", df$age_group)
	df$age_group <- sub("Alter unbekannt", NA, df$age_group)
	df$age_group <- sub("-Jährige", "", df$age_group)
	df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group)
	df
	}

	# Population
	pop <- read_delim(
	"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz",
	delim = ";", skip = 6
	)

	# Cleanup
	names(pop)[1] <- "age_group"
	names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1])
	pop <- pop \|>
	filter(!is.na(`2000`)) \|>
	parse_age_groups()
	pop <- pop \|>
	pivot_longer(
	cols = !c("age_group"), names_to = "year", values_to = "population"
	)
	pop$year <- as.integer(pop$year)
	pop$population <- as.integer(pop$population)


	# Summarize deaths data and align age groups using case_when
	# Align and summarize deaths data, consolidating 85+ groups
	df_deaths <- deaths %>%
	mutate(age_group = case_when(
	age_group %in% c("85-89", "90-94", "95+", "Insgesamt") ~ "85+",
	TRUE ~ age_group # Keep other age groups as they are
	)) %>%
	group_by(year, week, age_group) %>%
	summarize(deaths = sum(deaths), .groups = "drop")

	df_deaths_all <- df_deaths \|>
	group_by(year, week) \|>
	summarize(deaths = sum(deaths))
	df_deaths_all$age_group <- "all"

	df_deaths <- rbind(df_deaths, df_deaths_all)

	# Summarize population data (no need to modify 85+ since it already exists)
	df_pop <- pop %>%
	mutate(age_group = case_when(
	as.integer(age_group) < 30 ~ "0-29",
	as.integer(age_group) >= 30 & as.integer(age_group) < 35 ~ "30-34",
	as.integer(age_group) >= 35 & as.integer(age_group) < 40 ~ "35-39",
	as.integer(age_group) >= 40 & as.integer(age_group) < 45 ~ "40-44",
	as.integer(age_group) >= 45 & as.integer(age_group) < 50 ~ "45-49",
	as.integer(age_group) >= 50 & as.integer(age_group) < 55 ~ "50-54",
	as.integer(age_group) >= 55 & as.integer(age_group) < 60 ~ "55-59",
	as.integer(age_group) >= 60 & as.integer(age_group) < 65 ~ "60-64",
	as.integer(age_group) >= 65 & as.integer(age_group) < 70 ~ "65-69",
	as.integer(age_group) >= 70 & as.integer(age_group) < 75 ~ "70-74",
	as.integer(age_group) >= 75 & as.integer(age_group) < 80 ~ "75-79",
	as.integer(age_group) >= 80 & as.integer(age_group) < 85 ~ "80-84",
	TRUE ~ age_group # Keep 85+ and other existing groups
	)) %>%
	group_by(year, age_group) %>%
	summarize(population = sum(population), .groups = "drop")

	# Check if the age groups match between the two datasets
	stopifnot(identical(
	sort(unique(df_deaths$age_group)),
	sort(unique(df_pop$age_group))
	))

	df <- df_deaths \|>
	inner_join(df_pop, by = join_by("year", "age_group")) \|>
	mutate(
	date = make_yearweek(year, week),
	is_senior = case_when(
	age_group %in% c("65-69", "70-74", "75-79", "80-84", "85+") ~ TRUE,
	.default = FALSE
	)
	) \|>
	filter(year %in% bl_start_year:obs_end_year, week <= 52)


	# 1) Crude Mortality Rate (CMR)
	df_cmr <- df \|>
	filter(age_group != "NS") \|>
	mutate(age_group = case_when(
	!is_senior & age_group != "all" ~ "0-64",
	is_senior & age_group != "all" ~ "65+",
	.default = age_group
	)) \|>
	group_by(date, age_group) \|>
	summarize(
	deaths = sum(deaths),
	population = sum(population),
	.groups = "drop"
	)

	cmr <- df_cmr \|>
	mutate(cmr = deaths / population * per_factor) \|>
	group_by(age_group, week(date)) \|>
	mutate(cmr_bl = mean(cmr[year(date) %in% bl_start_year:bl_end_year])) \|>
	ungroup() \|>
	select(-`week(date)`)
	stopifnot(nrow(cmr \|> filter(is.na(cmr))) == 0)
	stopifnot(nrow(cmr \|> filter(is.na(cmr_bl))) == 0)

	# 2) Age-Standardized Mortality Rate (ASMR)
	calculate_asmr_for_group <- function(df_group, age_group_label) {
	std_pop <- get_std_pop_weights(unique(df_group$age_group), usa2000)
	std_pop$weight <- std_pop$weight / sum(std_pop$weight) # Scale to 1
	df_group \|>
	inner_join(std_pop, by = join_by(age_group)) \|>
	mutate(cmr = deaths / population * 100000) \|>
	group_by(year, week) \|>
	summarize(asmr = sum(cmr * weight), .groups = "drop") \|>
	mutate(date = make_yearweek(year, week)) \|>
	group_by(week) \|>
	mutate(
	asmr_bl = mean(asmr[year %in% bl_start_year:bl_end_year]),
	age_group = age_group_label
	) \|>
	ungroup()
	}

	# Calculate ASMR for all, 0-64, and 65+ age groups
	asmr_all <- calculate_asmr_for_group(
	df \|> filter(!age_group %in% c("NS", "all")), "all"
	)
	asmr_jr <- calculate_asmr_for_group(
	df \|> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
	)
	asmr_sr <- calculate_asmr_for_group(
	df \|> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
	)

	asmr <- bind_rows(asmr_all, asmr_jr, asmr_sr) \|> arrange(date)
	stopifnot(nrow(asmr \|> filter(is.na(asmr))) == 0)
	stopifnot(nrow(asmr \|> filter(is.na(asmr_bl))) == 0)


	# 3) Calculate Ages-Standardized Deaths (ASD)
	calculate_asd_for_group <- function(df2, age_group_label) {
	df2 \|>
	group_by(age_group, week) \|>
	summarize(
	asd_bl = mean(
	if_else(
	year <= bl_end_year & population > 0,
	deaths / population * per_factor,
	NA
	),
	na.rm = TRUE
	),
	.groups = "drop"
	) \|>
	inner_join(
	df \|> filter(year(date) >= bl_start_year, year(date) <= obs_end_year),
	by = c("age_group", "week")
	) \|>
	mutate(asd_bl = asd_bl * population / per_factor) \|>
	group_by(date) \|>
	summarize(
	asd = sum(deaths, na.rm = TRUE),
	asd_bl = sum(asd_bl, na.rm = TRUE),
	.groups = "drop"
	) \|>
	mutate(age_group = age_group_label)
	}

	# Calculate ASD for all, 0-64, and 65+ age groups
	asd_all <- calculate_asd_for_group(
	df \|> filter(!age_group %in% c("NS", "all")), "all"
	)
	asd_jr <- calculate_asd_for_group(
	df \|> filter(!is_senior, !age_group %in% c("NS", "all")), "0-64"
	)
	asd_sr <- calculate_asd_for_group(
	df \|> filter(is_senior, !age_group %in% c("NS", "all")), "65+"
	)

	asd <- bind_rows(asd_all, asd_jr, asd_sr) \|> arrange(date)


	# Calculate Pandemic Totals
	result <- cmr \|>
	inner_join(asmr, join_by(date, age_group)) \|>
	inner_join(asd, join_by(date, age_group))
	result$year <- as.character(result$year)

	pandemic <- result \|>
	filter(year %in% obs_start_year:obs_end_year) \|>
	group_by(age_group) \|>
	summarise(across(
	c(deaths, cmr, cmr_bl, asmr, asmr_bl, asd, asd_bl),
	sum
	), .groups = "drop") \|>
	mutate(year = "2020-2023", population = NA)
	result <- bind_rows(result, pandemic)

	# Calculate Excess
	result <- result \|>
	mutate(
	cmrx = cmr - cmr_bl, cmrx_p = cmrx / cmr_bl,
	asmrx = asmr - asmr_bl, asmrx_p = asmrx / asmr_bl,
	asdx = asd - asd_bl, asdx_p = asdx / asd_bl
	) \|>
	relocate(cmrx, cmrx_p, .after = cmr_bl) \|>
	relocate(asmrx, asmrx_p, .after = asmr_bl) \|>
	relocate(asdx, asdx_p, .after = asd_bl)

	stopifnot(nrow(result \|> filter(deaths > 0, is.na(cmrx_p))) == 0)
	stopifnot(nrow(result \|> filter(deaths > 0, is.na(asmrx_p))) == 0)
	stopifnot(nrow(result \|> filter(deaths > 0, is.na(asdx_p))) == 0)

	chart <- result \|>
	filter(!is.na(date)) \|>
	ggplot(aes(x = date)) +
	geom_line(aes(y = cmrx_p, color = "CMR"), linewidth = .4) +
	geom_line(aes(y = asmrx_p, color = "ASMR"), linewidth = .4) +
	geom_line(aes(y = asdx_p, color = "ASD"), linewidth = .4) +
	labs(
	title = "Excess CMR vs Excess ASMR vs. Excess ASD (%) [Germany]",
	subtitle = "Mean 2017-'19 Baseline",
	y = "Excess Mortality (%)",
	x = "Month of Year",
	color = "Mortality Type"
	) +
	scale_y_continuous(labels = scales::percent) +
	theme_bw() +
	facet_wrap(vars(age_group))

	ggplot2::ggsave(
	filename = "chart1.png", plot = chart, width = width, height = height,
	units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
	)

	# Correlation
	result %>%
	group_by(age_group) %>%
	summarise(
	cor_cmr_asmr = cor(cmrx_p, asmrx_p, use = "complete.obs"),
	cor_cmr_asxd = cor(cmrx_p, asdx_p, use = "complete.obs"),
	cor_asmr_asxd = cor(asmrx_p, asdx_p, use = "complete.obs")
	)