Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active March 13, 2025 18:16
Show Gist options
  • Save USMortality/3ec2e77d8f5abc8584df68e889002559 to your computer and use it in GitHub Desktop.
Save USMortality/3ec2e77d8f5abc8584df68e889002559 to your computer and use it in GitHub Desktop.
Live Births [Germany]
library(readr)
library(dplyr)
library(tsibble)
library(TTR)
library(ggplot2)
library(tidyr)
library(fable)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
parse_age_groups <- function(pop) {
pop$age_group <- iconv(pop$age_group)
pop$age_group <- sub("unter 1 Jahr", "0", pop$age_group)
pop$age_group <- sub("unter 15 Jahre", "0-14", pop$age_group)
pop$age_group <- sub("50 Jahre und mehr", "50+", pop$age_group)
pop$age_group <- sub("85 Jahre und mehr", "85+", pop$age_group)
pop$age_group <- sub("Insgesamt", "all", pop$age_group)
pop$age_group <- sub("Alter unbekannt", NA, pop$age_group)
pop$age_group <- sub("-Jährige", "", pop$age_group)
pop
}
# 1. Births
# The absolute number of live births occurring in a population within a
# specified period, usually a year.
# https://www-genesis.destatis.de/genesis/online?operation=table&code=12612-0002
births <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12612-0002.csv.gz",
delim = ";",
col_names = c("year", "month", "m", "f", "total"),
col_types = "iciii",
skip = 6
) |> fill(year)
births$month_int <- match(births$month, c(
"Januar", "Februar", "März", "April", "Mai", "Juni",
"Juli", "August", "September", "Oktober", "November", "Dezember"
))
ts <- births |>
filter(!is.na(month_int) & total != "...") |>
mutate(
date = make_yearmonth(year, month_int),
total = as.numeric(total),
moving_avg = SMA(total, n = 12)
) |>
select(date, total, moving_avg) |>
as_tsibble(index = date)
chart <- ggplot(ts, aes(x = date)) +
geom_line(aes(y = total), color = "black") +
geom_line(aes(y = moving_avg), color = "blue") +
labs(
title = "Total and 12-month Moving Average of Live Births [Germany]",
subtitle = "Source: Destatis/Genesis: 12612-0002",
caption =
"The absolute number of live births occurring in within a year.",
y = "Live Births", x = "Month"
)
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# 2. Crude Birth Rate (CBR):
# This rate expresses the number of live births per 1,000 people in the total
# population per year.
pop <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12411-0005.csv.gz",
delim = ";", skip = 6
)
# Cleanup
names(pop)[1] <- "age_group"
names(pop)[-1] <- sub(".*(.{4})$", "\\1", names(pop)[-1])
pop <- pop |>
filter(!is.na(`2000`)) |>
parse_age_groups()
pop_all <- pop |>
filter(age_group == "all") |>
pivot_longer(
cols = !c("age_group"), names_to = "year", values_to = "population"
)
pop_all$year <- as.integer(pop_all$year)
pop_all$population <- as.integer(pop_all$population)
ts <- births |>
filter(!is.na(month_int) & total != "...") |>
inner_join(pop_all, by = c("year")) |>
mutate(cbr = as.integer(total) / population * 1000) |>
mutate(
date = make_yearmonth(year, month_int),
moving_avg = SMA(cbr, n = 12)
) |>
select(date, cbr, moving_avg) |>
as_tsibble(index = date)
chart <- ggplot(ts, aes(x = date)) +
geom_line(aes(y = cbr), color = "black") +
geom_line(aes(y = moving_avg), color = "blue") +
labs(
title = "Total and 12-month Moving Average of Crude Birth Rate [Germany]",
subtitle = "Source: Destatis/Genesis: 12411-0005, 12612-0002",
caption = paste0(
"This rate expresses the number of live births per 1,000 people in the ",
"total population per year."
),
y = "Live Births per 1,000 population", x = "Month"
)
ggplot2::ggsave(
filename = "chart2.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# 3. General Fertility Rate (GFR):
# This rate expresses the number of live births per 1,000 women of
# childbearing age (15-49 years) per year.
pop_sex <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12411-0006.csv.gz",
delim = ";",
col_names = c("year", "age_group", "m", "f", "total"),
col_types = "cciiii",
skip = 8,
locale = locale(encoding = "UTF-8")
) |>
parse_age_groups() |>
fill(year) |>
filter(!is.na(age_group))
# Cleanup
pop_sex$year <- as.integer(sub(".*(.{4})$", "\\1", pop_sex$year))
pop_f_15_49 <- pop_sex |>
filter(age_group %in% 15:49) |>
select(year, f) |>
group_by(year) |>
summarize(f = sum(f))
ts <- births |>
filter(!is.na(month_int) & total != "...") |>
inner_join(pop_f_15_49, by = c("year")) |>
mutate(gfr = as.integer(total) / f.y * 1000) |>
mutate(
date = make_yearmonth(year, month_int),
moving_avg = SMA(gfr, n = 12)
) |>
select(date, gfr, moving_avg) |>
as_tsibble(index = date)
chart <- ggplot(ts, aes(x = date)) +
geom_line(aes(y = gfr), color = "black") +
geom_line(aes(y = moving_avg), color = "blue") +
labs(
title =
"Total and 12-month Moving Average of General Fertility Rate [Germany]",
subtitle = paste(
"Women of childbearing age (15-49 years)",
"Source: Destatis/Genesis: 12612-0002, 12411-0006",
sep = " · "
),
caption = paste0(
"This rate expresses the number of live births per 1,000 women of ",
"childbearing age (15-49 years) per year."
),
y = "Live Births live births per 1,000 women of childbearing age",
x = "Month"
)
ggplot2::ggsave(
filename = "chart3.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# 4. Age-Specific Fertility Rate (ASFR):
# This rate expresses the number of live births per 1,000 women in a specific
# age group per year.
## a) Age Specific Rates
births_age <- read_delim(
"https://apify.mortality.watch/destatis-genesis/12612-0005.csv.gz",
delim = ";",
col_names = c("year", "age_group", "1", "2", "3", "4"),
col_types = "iciiii",
skip = 8
) |>
parse_age_groups() |>
fill(year) |>
filter(age_group %in% 15:49)
births_age$births <- rowSums(births_age[, c("1", "2", "3", "4")], na.rm = TRUE)
ts <- pop_sex |>
select(year, age_group, f) |>
inner_join(births_age |> select(year, age_group, births),
by = join_by(year, age_group)
) |>
mutate(fr = as.integer(births) / f * 1000) |>
select(year, age_group, fr) |>
filter(year >= 2017)
model <- ts |>
as_tsibble(index = year, key = age_group) |>
filter(year >= 2017 & year <= 2019) |>
model(lm = TSLM(fr))
forecasts <- model |>
forecast(h = "5 years", level = 95) |>
mutate(hl = hilo(fr, level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(year, age_group, .mean, hl_lower, hl_upper)
# Compute fitted values for pre-2020 period
fitted_values <- model |>
augment() |>
select(year, age_group, .fitted)
# Properly join fitted values
ts_augmented <- ts |>
left_join(fitted_values, by = c("year", "age_group"))
ts_with_forecast <- ts_augmented |>
left_join(forecasts, by = c("year", "age_group"))
width <- 1920 / 2 * sf
height <- 1080 / 2 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
chart <-
ggplot(ts_with_forecast, aes(x = year)) +
geom_line(aes(y = fr), color = "red") +
geom_line(aes(y = .fitted), color = "black", linetype = "dashed") +
geom_line(aes(y = .mean), color = "black", linetype = "dashed") +
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper),
fill = "black", alpha = 0.2
) +
facet_wrap(~age_group, scales = "free") +
labs(
title = "Fertility Rate by Age [Germany]",
subtitle = paste(
"Women of childbearing age (15-49 years)",
"Source: Destatis/Genesis: 12612-0005, 12411-0006",
sep = " · "
),
caption = paste0(
"This rate expresses the number of live births per 1,000 women in a ",
"specific age group per year."
),
y = "Live Births per 1,000 women",
x = "Year"
)
ggplot2::ggsave(
filename = "chart4.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
excess <- ts_with_forecast |>
filter(year >= 2017) |>
mutate(excess = fr / (ifelse(is.na(.mean), .fitted, .mean)) - 1)
chart <- ggplot(excess, aes(x = year)) +
geom_col(aes(y = excess), fill = "red") +
labs(
title = "Change in Fertility Rate by Age [Germany]",
subtitle = paste(
"Women of childbearing age (15-49 years)",
"Source: Destatis/Genesis: 12612-0005, 12411-0006",
sep = " · "
),
caption = paste0(
"This rate expresses the percentage change compared to the baseline ",
"2017-'19 of live births per 1,000 women in a ",
"specific age group per year."
),
y = "Change in Live Births per 1,000 women",
x = "Year"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~age_group, scales = "free")
ggplot2::ggsave(
filename = "chart5.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# b) age standardized rate
pop_2020 <- pop_sex |> filter(year == 2020, age_group %in% 15:49)
weights <- pop_2020 |>
mutate(weight = f / sum(pop_2020$f)) |>
select(age_group, weight)
ts <- ts |>
left_join(weights, by = join_by(age_group), relationship = "many-to-many") |>
mutate(fr = fr * weight) |>
group_by(year) |>
summarize(fr = sum(fr))
width <- 600 * sf
height <- 335 * sf
chart <- ggplot(ts, aes(x = year)) +
geom_line(aes(y = fr), color = "black") +
ylim(0, 200) +
labs(
title = "Age Standardized Fertility Rate by Age [Germany]",
subtitle = paste(
"Women of childbearing age (15-49 years)",
"Source: Destatis/Genesis: 12612-0005, 12411-0006",
sep = " · "
),
caption = paste0(
"This rate expresses the number of live births per 1,000 women if the ",
"population had the same proportions as in 2020."
),
y = "Live Births per 1,000 women",
x = "Year"
)
ggplot2::ggsave(
filename = "chart6.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
@USMortality
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment