Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active February 13, 2025 03:37
Show Gist options
  • Save USMortality/abc83ca883b8f624c7d279b2248e37f1 to your computer and use it in GitHub Desktop.
Save USMortality/abc83ca883b8f624c7d279b2248e37f1 to your computer and use it in GitHub Desktop.
Yearly Death Rates [Germany]
library(readr)
library(tidyr)
library(ggplot2)
library(dplyr)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
parse_age_groups <- function(df) {
df$age_group <- sub("unter 1 Jahr", "0", df$age_group)
df$age_group <- sub("unter 15 Jahre", "0-14", df$age_group)
df$age_group <- sub("50 Jahre und mehr", "50+", df$age_group)
df$age_group <- sub("85 Jahre und mehr", "85+", df$age_group)
df$age_group <- sub("Insgesamt", "all", df$age_group)
df$age_group <- sub("Alter unbekannt", NA, df$age_group)
df$age_group <- sub("-Jährige", "", df$age_group)
df$age_group <- sub("100 Jahre und mehr", "100+", df$age_group)
df
}
# Population
pop <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz",
delim = ";", skip = 6,
)
# Cleanup
names(pop)[1] <- "age_group"
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1])
pop <- pop |>
# filter(!is.na(`2000`)) |>
parse_age_groups()
pop <- pop |>
pivot_longer(
cols = !c("age_group"), names_to = "year", values_to = "population"
)
pop$year <- as.integer(pop$year)
pop$population <- as.integer(pop$population)
# Deaths
deaths <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12613-0003.csv.gz",
delim = ";",
skip = 5,
col_types = cols(
.default = col_integer(),
`...1` = col_character(),
`...2` = col_character()
)
)
names(deaths)[1] <- "sex"
names(deaths)[2] <- "age_group"
deaths <- deaths |>
fill(sex) |>
filter(sex == "Insgesamt") |>
pivot_longer(
cols = !c("sex", "age_group"), names_to = "year", values_to = "deaths"
) |>
parse_age_groups() |>
filter(!is.na(year)) |>
select(2, 3, 4)
deaths$year <- as.numeric(deaths$year)
# Calculate Death Risk for 2010-2019
deaths_ag <- deaths |>
filter(year >= 2010) |>
mutate(
age_group = case_when(
age_group %in% c(0:49) ~ "0-49",
age_group %in% c(50:54) ~ "50-54",
age_group %in% c(55:59) ~ "55-59",
age_group %in% c(60:64) ~ "60-64",
age_group %in% c(65:69) ~ "65-69",
age_group %in% c(70:74) ~ "70-74",
age_group %in% c(75:79) ~ "75-79",
age_group %in% c(80:99) ~ "80+",
age_group == "100+" ~ "80+"
)
) |>
group_by(year, age_group) |>
summarise(deaths = sum(.data$deaths), .groups = "drop")
pop_ag <- pop |>
filter(year >= 2010) |>
mutate(
age_group = case_when(
age_group %in% c(0:49) ~ "0-49",
age_group %in% c(50:54) ~ "50-54",
age_group %in% c(55:59) ~ "55-59",
age_group %in% c(60:64) ~ "60-64",
age_group %in% c(65:69) ~ "65-69",
age_group %in% c(70:74) ~ "70-74",
age_group %in% c(75:79) ~ "75-79",
age_group %in% c(80:84) ~ "80+",
age_group == "85+" ~ "80+"
)
) |>
group_by(year, age_group) |>
summarise(population = sum(.data$population), .groups = "drop")
df <- deaths_ag |>
filter(!is.na(age_group)) |>
inner_join(pop_ag, by = join_by(age_group, year)) |>
mutate(risk = deaths / population)
# Plot risk
df$color <- ifelse(df$year >= 2010 & df$year <= 2019, "2010-'19", "2020-'22")
chart <- ggplot(df, aes(x = age_group, y = risk, group = year, color = color)) +
geom_line() +
geom_point() +
# scale_y_log10(labels = scales::percent_format()) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "Age Group", y = "Risk",
title = "Risk of Death by Age Group and Year [Germany]",
subtitle = "Source: Destatis/Genesis: 12411-0005, 12613-0003",
color = "Year"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(
values = c("2010-'19" = "black", "2020-'22" = "red"),
labels = c("2010-'19" = "2010-'19", "2020-'22" = "2020-'22")
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment