Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active November 27, 2024 19:09
Show Gist options
  • Save USMortality/248b4beed3d8688d63c41acdeb4c2f85 to your computer and use it in GitHub Desktop.
Save USMortality/248b4beed3d8688d63c41acdeb4c2f85 to your computer and use it in GitHub Desktop.
Pull Forward Effect in Large Mortality Event [Bergamo, Italy] n=10
library(tsibble)
library(fable)
library(ggplot2)
library(tidyverse)
library(data.table)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Source: http://dati.istat.it; Bergamo 2019
population_bergamo_single_ages <- c(
8448, 9068, 9406, 9530, 10015, 10300, 10772, 11215, 11435, 11805, 11675,
11540, 11622, 11385, 11388, 11443, 11267, 11344, 11572, 11302, 11463, 11393,
11030, 11005, 10834, 10903, 11282, 11360, 11539, 11441, 12060, 11774, 11695,
12277, 12793, 13000, 13353, 13665, 14090, 14388, 15646, 15802, 16466, 17635,
17945, 18015, 18109, 17835, 18121, 18315, 18380, 18380, 18970, 18781, 18939,
17530, 16615, 15836, 15640, 15132, 14602, 13681, 13683, 13266, 12951, 12636,
12318, 12268, 12991, 12815, 12767, 11820, 12109, 9073, 9548, 9218, 9495,
9125, 9954, 9161, 8752, 7529, 6500, 6533, 5798, 5252, 4848, 4197, 3897, 3146,
2634, 2249, 1662, 1363, 1105, 787, 489, 428, 244, 121, 175
)
# Lifetable
death_probability_single_age <- c(
3.38721, 0.15683, 0.14743, 0.13804, 0.12864, 0.10225, 0.09685, 0.09145,
0.08604, 0.08064, 0.0613, 0.05685, 0.0524, 0.06171, 0.08478, 0.14544,
0.17649, 0.20754, 0.24075, 0.2761, 0.3232, 0.3605, 0.39778, 0.4076, 0.38999,
0.31309, 0.30039, 0.28769, 0.29181, 0.31274, 0.36298, 0.3856, 0.40821,
0.43583, 0.46846, 0.49971, 0.53223, 0.56475, 0.60691, 0.65872, 0.70462,
0.75604, 0.80746, 0.88455, 0.98732, 1.09855, 1.20234, 1.30612, 1.4506,
1.63575, 1.85266, 2.04142, 2.23008, 2.44738, 2.69334, 2.89516, 3.13729,
3.37928, 3.70303, 4.10843, 4.52209, 4.92773, 5.33301, 5.86847, 6.53325,
7.10771, 7.76423, 8.41774, 9.41929, 10.769, 12.27003, 13.63954, 15.00609,
16.89305, 19.29446, 21.49802, 23.8437, 26.16875, 29.66179, 34.32709,
39.11341, 43.79693, 48.46659, 55.39584, 64.56486, 73.98942, 83.07488,
92.09783, 104.93058, 121.5472, 141.31298, 158.04674, 174.63864, 193.16815,
213.631, 231.45606, 251.41878, 271.22361, 294.79518, 322.15583, 351.36235,
378.59252, 405.70656, 435.07719, 466.76579, 500.7487, 532.45002, 564.13332,
594.77679, 624.47408, 655.04777, 684.91851, 714.81042, 741.177, 764.04937,
786.2723, 809.16477, 832.00764, 850.96336, 866.06502
) / 1000
init_pop <- function(population_by_age) {
population <- tibble(
id = integer(0), age = integer(0), health_risk = numeric(0)
)
for (age in seq_along(population_by_age)) {
count <- population_by_age[age] # Number of individuals for this age
health_risk <- runif(count) # Random health risk values
population <- rbind(population, tibble(
id = seq(nrow(population) + 1, nrow(population) + count),
age = rep(age - 1, count) + runif(count, min = 0, max = 51 / 52),
age_group = cut(
round(age),
breaks = seq(0, 120, by = 20),
labels = c("0-19", "20-39", "40-59", "60-79", "80-99", "100+"),
include.lowest = TRUE # Include the lowest value in the first interval
),
health_risk = health_risk,
health_status = cut(
health_risk,
breaks = seq(0, 1, by = 0.2),
labels = c("Very Good", "Good", "Fair", "Poor", "Very Poor"),
include.lowest = TRUE # Include the lowest value in the first interval
)
))
}
population
}
simulate_week <- function(week, population, high_mortality_factor = 1) {
mortality_prob <- weekly_mortality_risks[population$age + 1]
adjusted_mortality_prob <-
mortality_prob * (1 - population$health_risk) +
(mortality_prob * population$health_risk) * high_mortality_factor
deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob)
death_ids <- population$id[deaths == 1]
deaths <- population[population$id %in% death_ids, ]
deaths$week <- week
population <- population[!population$id %in% death_ids, ]
population$age <- population$age + 1 / weeks_per_year
list(population = population, deaths = deaths |>
select(week, age_group, health_status))
}
ages <- seq_along(population_bergamo_single_ages) - 1
population <- init_pop(population_bergamo_single_ages)
weeks_per_year <- 52
simulation_duration_weeks <- 3 * weeks_per_year
weekly_mortality_risks <-
1 - (1 - death_probability_single_age)^(1 / weeks_per_year)
# High mortality event effect profile
euro <- fread(paste0(
"https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/",
"demo_r_mwk3_t?format=TSV"
))
x <- unlist(euro[euro[[1]] %like% "ITC46"])[-1]
x <- x[x != ":"]
d <- data.table(
year = as.integer(substr(names(x), 1, 4)),
week = as.integer(substr(names(x), 7, 8))
)[, x := 1:.N]
d$dead <- as.integer(sub(" .*", "", x))
d$trend <- d[year %in% 2013:2019, predict(lm(dead ~ x), d)]
d <- merge(d, d[year %in% 2013:2019, .(weekly = mean(dead - trend)), week])
high_mortality_factors <-
d[year == 2020 & week <= 26, dead / (trend + weekly)][9:19]
high_mortality_start_week <- 1 * weeks_per_year + 1
high_mortality_weeks <- high_mortality_start_week:(
high_mortality_start_week + length(high_mortality_factors) - 1)
sim1 <- tibble()
sim2 <- tibble()
population_before <- population
for (sim in 1:10) {
set.seed(sim)
population <- population_before
for (week in 1:simulation_duration_weeks) {
r <- simulate_week(week, population, 1)
population <- r$population
sim1 <- rbind(sim1, r$deaths)
}
set.seed(sim)
population <- population_before
for (week in 1:simulation_duration_weeks) {
high_mortality_factor <- ifelse(
week %in% high_mortality_weeks,
high_mortality_factors[week - min(high_mortality_weeks) + 1], 1
)
r <- simulate_week(week, population, high_mortality_factor)
population <- r$population
sim2 <- rbind(sim2, r$deaths)
}
}
# Prepare data for plotting
result <- sim1 |>
group_by(week) |>
summarize(deaths1 = n() / 10, .groups = "drop") |>
as_tsibble(index = week) |>
inner_join(
sim2 |>
group_by(week) |>
summarize(deaths2 = n() / 10, .groups = "drop") |>
as_tsibble(index = week),
by = join_by(week)
)
# Fit baseline model
baseline_model <- result |>
filter(week < high_mortality_start_week) |>
model(TSLM(deaths1 ~ trend()))
forecast_data <- baseline_model |>
forecast(h = simulation_duration_weeks - high_mortality_start_week) |>
fabletools::hilo(95) |> # Add 95% prediction interval
fabletools::unpack_hilo(cols = "95%")
# Extract fitted values from the baseline model
fitted_values <- baseline_model %>%
augment() %>%
select(week, .mean = .fitted)
# Combine original data and forecasts with prediction intervals
weekly_deaths <- result |>
left_join(fitted_values, by = "week") %>%
left_join(
forecast_data |>
as_tibble() |>
select(week, .mean, `95%_lower`, `95%_upper`), # Include prediction bounds
by = "week"
) |>
mutate(.mean = ifelse(is.na(.mean.x), .mean.y, .mean.x)) |>
mutate(
sma_a = slider::slide_dbl(deaths1, mean, .before = 12, .complete = TRUE),
sma_b = slider::slide_dbl(deaths2, mean, .before = 12, .complete = TRUE)
)
# Plot with trendline and prediction intervals
chart1 <- ggplot(weekly_deaths, aes(x = week)) +
geom_line(aes(y = deaths2), color = "red", linetype = "dotted") +
geom_line(aes(y = deaths1), color = "black", linetype = "dotted") +
geom_line(aes(y = sma_b), color = "red") +
geom_line(aes(y = sma_a), color = "black") +
geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
# geom_line(aes(y = deaths2), color = "red") +
# geom_line(aes(y = deaths1), color = "black") +
geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
geom_ribbon(aes(ymin = `95%_lower`, ymax = `95%_upper`),
fill = "blue", alpha = 0.2
) +
labs(
title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]",
subtitle = paste0(
"Population: ", formatC(nrow(population), format = "d", big.mark = ","),
", High Mortality Event Starting Week 100",
", Death Risk & Population from Bergamo, Italy"
),
caption = "dotted = weekly · solid = 13w SMA · Mean of 10 Simulations",
x = "Week",
y = "Deaths"
)
chart2 <- chart1 + coord_cartesian(ylim = c(150, 300))
# Calculate yearly shift
weekly_deaths |>
as_tibble() |>
filter(!week %in% high_mortality_weeks) |>
mutate(year = floor((week - 1) / weeks_per_year)) |>
select(year, deaths1, deaths2) |>
group_by(year) |>
summarize(
deaths1 = sum(deaths1),
deaths2 = sum(deaths2),
ratio = deaths1 / deaths2 - 1,
.groups = "drop"
)
# Plot population
chart3 <- bind_rows(
population_before |>
group_by(age) |>
summarize(n = n(), .groups = "drop") |>
mutate(age = floor(age), status = "Before"),
population |>
group_by(age) |>
summarize(n = n(), .groups = "drop") |>
mutate(age = floor(age), status = "After")
) |>
ggplot(aes(x = age, y = n, color = status)) +
geom_line() +
labs(
title = "Population Before and After Simulation",
x = "Age",
y = "Count",
color = "Status"
)
chart4 <- sim2 |>
group_by(week, age_group) |>
summarize(deaths = n(), .groups = "drop") |>
ggplot(aes(x = week, y = deaths)) +
geom_line() +
facet_wrap(vars(age_group), scales = "free") +
labs(title = "Deaths by Age Group")
chart5 <- sim2 |>
group_by(week, health_status) |>
summarize(deaths = n(), .groups = "drop") |>
ggplot(aes(x = week, y = deaths)) +
geom_line() +
facet_wrap(vars(health_status), scales = "free") +
labs(title = "Deaths by Health Status")
charts <- list(chart1, chart2, chart3, chart4, chart5)
# Loop to save each chart
for (i in seq_along(charts)) {
ggsave(
filename = paste0("chart", i, ".png"), plot = charts[[i]],
width = 8, height = 4, units = "in", dpi = 300
)
}
# source("/Users/ben/Downloads/1.r")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment