Last active
November 27, 2024 19:09
-
-
Save USMortality/248b4beed3d8688d63c41acdeb4c2f85 to your computer and use it in GitHub Desktop.
Pull Forward Effect in Large Mortality Event [Bergamo, Italy] n=10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tsibble) | |
library(fable) | |
library(ggplot2) | |
library(tidyverse) | |
library(data.table) | |
sf <- 2 | |
width <- 600 * sf | |
height <- 335 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
# Source: http://dati.istat.it; Bergamo 2019 | |
population_bergamo_single_ages <- c( | |
8448, 9068, 9406, 9530, 10015, 10300, 10772, 11215, 11435, 11805, 11675, | |
11540, 11622, 11385, 11388, 11443, 11267, 11344, 11572, 11302, 11463, 11393, | |
11030, 11005, 10834, 10903, 11282, 11360, 11539, 11441, 12060, 11774, 11695, | |
12277, 12793, 13000, 13353, 13665, 14090, 14388, 15646, 15802, 16466, 17635, | |
17945, 18015, 18109, 17835, 18121, 18315, 18380, 18380, 18970, 18781, 18939, | |
17530, 16615, 15836, 15640, 15132, 14602, 13681, 13683, 13266, 12951, 12636, | |
12318, 12268, 12991, 12815, 12767, 11820, 12109, 9073, 9548, 9218, 9495, | |
9125, 9954, 9161, 8752, 7529, 6500, 6533, 5798, 5252, 4848, 4197, 3897, 3146, | |
2634, 2249, 1662, 1363, 1105, 787, 489, 428, 244, 121, 175 | |
) | |
# Lifetable | |
death_probability_single_age <- c( | |
3.38721, 0.15683, 0.14743, 0.13804, 0.12864, 0.10225, 0.09685, 0.09145, | |
0.08604, 0.08064, 0.0613, 0.05685, 0.0524, 0.06171, 0.08478, 0.14544, | |
0.17649, 0.20754, 0.24075, 0.2761, 0.3232, 0.3605, 0.39778, 0.4076, 0.38999, | |
0.31309, 0.30039, 0.28769, 0.29181, 0.31274, 0.36298, 0.3856, 0.40821, | |
0.43583, 0.46846, 0.49971, 0.53223, 0.56475, 0.60691, 0.65872, 0.70462, | |
0.75604, 0.80746, 0.88455, 0.98732, 1.09855, 1.20234, 1.30612, 1.4506, | |
1.63575, 1.85266, 2.04142, 2.23008, 2.44738, 2.69334, 2.89516, 3.13729, | |
3.37928, 3.70303, 4.10843, 4.52209, 4.92773, 5.33301, 5.86847, 6.53325, | |
7.10771, 7.76423, 8.41774, 9.41929, 10.769, 12.27003, 13.63954, 15.00609, | |
16.89305, 19.29446, 21.49802, 23.8437, 26.16875, 29.66179, 34.32709, | |
39.11341, 43.79693, 48.46659, 55.39584, 64.56486, 73.98942, 83.07488, | |
92.09783, 104.93058, 121.5472, 141.31298, 158.04674, 174.63864, 193.16815, | |
213.631, 231.45606, 251.41878, 271.22361, 294.79518, 322.15583, 351.36235, | |
378.59252, 405.70656, 435.07719, 466.76579, 500.7487, 532.45002, 564.13332, | |
594.77679, 624.47408, 655.04777, 684.91851, 714.81042, 741.177, 764.04937, | |
786.2723, 809.16477, 832.00764, 850.96336, 866.06502 | |
) / 1000 | |
init_pop <- function(population_by_age) { | |
population <- tibble( | |
id = integer(0), age = integer(0), health_risk = numeric(0) | |
) | |
for (age in seq_along(population_by_age)) { | |
count <- population_by_age[age] # Number of individuals for this age | |
health_risk <- runif(count) # Random health risk values | |
population <- rbind(population, tibble( | |
id = seq(nrow(population) + 1, nrow(population) + count), | |
age = rep(age - 1, count) + runif(count, min = 0, max = 51 / 52), | |
age_group = cut( | |
round(age), | |
breaks = seq(0, 120, by = 20), | |
labels = c("0-19", "20-39", "40-59", "60-79", "80-99", "100+"), | |
include.lowest = TRUE # Include the lowest value in the first interval | |
), | |
health_risk = health_risk, | |
health_status = cut( | |
health_risk, | |
breaks = seq(0, 1, by = 0.2), | |
labels = c("Very Good", "Good", "Fair", "Poor", "Very Poor"), | |
include.lowest = TRUE # Include the lowest value in the first interval | |
) | |
)) | |
} | |
population | |
} | |
simulate_week <- function(week, population, high_mortality_factor = 1) { | |
mortality_prob <- weekly_mortality_risks[population$age + 1] | |
adjusted_mortality_prob <- | |
mortality_prob * (1 - population$health_risk) + | |
(mortality_prob * population$health_risk) * high_mortality_factor | |
deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob) | |
death_ids <- population$id[deaths == 1] | |
deaths <- population[population$id %in% death_ids, ] | |
deaths$week <- week | |
population <- population[!population$id %in% death_ids, ] | |
population$age <- population$age + 1 / weeks_per_year | |
list(population = population, deaths = deaths |> | |
select(week, age_group, health_status)) | |
} | |
ages <- seq_along(population_bergamo_single_ages) - 1 | |
population <- init_pop(population_bergamo_single_ages) | |
weeks_per_year <- 52 | |
simulation_duration_weeks <- 3 * weeks_per_year | |
weekly_mortality_risks <- | |
1 - (1 - death_probability_single_age)^(1 / weeks_per_year) | |
# High mortality event effect profile | |
euro <- fread(paste0( | |
"https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/", | |
"demo_r_mwk3_t?format=TSV" | |
)) | |
x <- unlist(euro[euro[[1]] %like% "ITC46"])[-1] | |
x <- x[x != ":"] | |
d <- data.table( | |
year = as.integer(substr(names(x), 1, 4)), | |
week = as.integer(substr(names(x), 7, 8)) | |
)[, x := 1:.N] | |
d$dead <- as.integer(sub(" .*", "", x)) | |
d$trend <- d[year %in% 2013:2019, predict(lm(dead ~ x), d)] | |
d <- merge(d, d[year %in% 2013:2019, .(weekly = mean(dead - trend)), week]) | |
high_mortality_factors <- | |
d[year == 2020 & week <= 26, dead / (trend + weekly)][9:19] | |
high_mortality_start_week <- 1 * weeks_per_year + 1 | |
high_mortality_weeks <- high_mortality_start_week:( | |
high_mortality_start_week + length(high_mortality_factors) - 1) | |
sim1 <- tibble() | |
sim2 <- tibble() | |
population_before <- population | |
for (sim in 1:10) { | |
set.seed(sim) | |
population <- population_before | |
for (week in 1:simulation_duration_weeks) { | |
r <- simulate_week(week, population, 1) | |
population <- r$population | |
sim1 <- rbind(sim1, r$deaths) | |
} | |
set.seed(sim) | |
population <- population_before | |
for (week in 1:simulation_duration_weeks) { | |
high_mortality_factor <- ifelse( | |
week %in% high_mortality_weeks, | |
high_mortality_factors[week - min(high_mortality_weeks) + 1], 1 | |
) | |
r <- simulate_week(week, population, high_mortality_factor) | |
population <- r$population | |
sim2 <- rbind(sim2, r$deaths) | |
} | |
} | |
# Prepare data for plotting | |
result <- sim1 |> | |
group_by(week) |> | |
summarize(deaths1 = n() / 10, .groups = "drop") |> | |
as_tsibble(index = week) |> | |
inner_join( | |
sim2 |> | |
group_by(week) |> | |
summarize(deaths2 = n() / 10, .groups = "drop") |> | |
as_tsibble(index = week), | |
by = join_by(week) | |
) | |
# Fit baseline model | |
baseline_model <- result |> | |
filter(week < high_mortality_start_week) |> | |
model(TSLM(deaths1 ~ trend())) | |
forecast_data <- baseline_model |> | |
forecast(h = simulation_duration_weeks - high_mortality_start_week) |> | |
fabletools::hilo(95) |> # Add 95% prediction interval | |
fabletools::unpack_hilo(cols = "95%") | |
# Extract fitted values from the baseline model | |
fitted_values <- baseline_model %>% | |
augment() %>% | |
select(week, .mean = .fitted) | |
# Combine original data and forecasts with prediction intervals | |
weekly_deaths <- result |> | |
left_join(fitted_values, by = "week") %>% | |
left_join( | |
forecast_data |> | |
as_tibble() |> | |
select(week, .mean, `95%_lower`, `95%_upper`), # Include prediction bounds | |
by = "week" | |
) |> | |
mutate(.mean = ifelse(is.na(.mean.x), .mean.y, .mean.x)) |> | |
mutate( | |
sma_a = slider::slide_dbl(deaths1, mean, .before = 12, .complete = TRUE), | |
sma_b = slider::slide_dbl(deaths2, mean, .before = 12, .complete = TRUE) | |
) | |
# Plot with trendline and prediction intervals | |
chart1 <- ggplot(weekly_deaths, aes(x = week)) + | |
geom_line(aes(y = deaths2), color = "red", linetype = "dotted") + | |
geom_line(aes(y = deaths1), color = "black", linetype = "dotted") + | |
geom_line(aes(y = sma_b), color = "red") + | |
geom_line(aes(y = sma_a), color = "black") + | |
geom_line(aes(y = .mean), color = "blue", linetype = "dashed") + | |
# geom_line(aes(y = deaths2), color = "red") + | |
# geom_line(aes(y = deaths1), color = "black") + | |
geom_line(aes(y = .mean), color = "blue", linetype = "dashed") + | |
geom_ribbon(aes(ymin = `95%_lower`, ymax = `95%_upper`), | |
fill = "blue", alpha = 0.2 | |
) + | |
labs( | |
title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]", | |
subtitle = paste0( | |
"Population: ", formatC(nrow(population), format = "d", big.mark = ","), | |
", High Mortality Event Starting Week 100", | |
", Death Risk & Population from Bergamo, Italy" | |
), | |
caption = "dotted = weekly · solid = 13w SMA · Mean of 10 Simulations", | |
x = "Week", | |
y = "Deaths" | |
) | |
chart2 <- chart1 + coord_cartesian(ylim = c(150, 300)) | |
# Calculate yearly shift | |
weekly_deaths |> | |
as_tibble() |> | |
filter(!week %in% high_mortality_weeks) |> | |
mutate(year = floor((week - 1) / weeks_per_year)) |> | |
select(year, deaths1, deaths2) |> | |
group_by(year) |> | |
summarize( | |
deaths1 = sum(deaths1), | |
deaths2 = sum(deaths2), | |
ratio = deaths1 / deaths2 - 1, | |
.groups = "drop" | |
) | |
# Plot population | |
chart3 <- bind_rows( | |
population_before |> | |
group_by(age) |> | |
summarize(n = n(), .groups = "drop") |> | |
mutate(age = floor(age), status = "Before"), | |
population |> | |
group_by(age) |> | |
summarize(n = n(), .groups = "drop") |> | |
mutate(age = floor(age), status = "After") | |
) |> | |
ggplot(aes(x = age, y = n, color = status)) + | |
geom_line() + | |
labs( | |
title = "Population Before and After Simulation", | |
x = "Age", | |
y = "Count", | |
color = "Status" | |
) | |
chart4 <- sim2 |> | |
group_by(week, age_group) |> | |
summarize(deaths = n(), .groups = "drop") |> | |
ggplot(aes(x = week, y = deaths)) + | |
geom_line() + | |
facet_wrap(vars(age_group), scales = "free") + | |
labs(title = "Deaths by Age Group") | |
chart5 <- sim2 |> | |
group_by(week, health_status) |> | |
summarize(deaths = n(), .groups = "drop") |> | |
ggplot(aes(x = week, y = deaths)) + | |
geom_line() + | |
facet_wrap(vars(health_status), scales = "free") + | |
labs(title = "Deaths by Health Status") | |
charts <- list(chart1, chart2, chart3, chart4, chart5) | |
# Loop to save each chart | |
for (i in seq_along(charts)) { | |
ggsave( | |
filename = paste0("chart", i, ".png"), plot = charts[[i]], | |
width = 8, height = 4, units = "in", dpi = 300 | |
) | |
} | |
# source("/Users/ben/Downloads/1.r") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment