USMortality · November 27, 2024 19:09
diff --git a/.chart_bergamo_pull_forward2.r b/.chart_bergamo_pull_forward2.r
 library(tsibble)
 library(fable)
 library(ggplot2)
 library(tidyverse)
 library(data.table)

 sf <- 2
 width <- 600 * sf
 height <- 335 * sf
 options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))

 # Source: http://dati.istat.it; Bergamo 2019
 population_bergamo_single_ages <- c(
  8448, 9068, 9406, 9530, 10015, 10300, 10772, 11215, 11435, 11805, 11675,
  11540, 11622, 11385, 11388, 11443, 11267, 11344, 11572, 11302, 11463, 11393,
  11030, 11005, 10834, 10903, 11282, 11360, 11539, 11441, 12060, 11774, 11695,
  12277, 12793, 13000, 13353, 13665, 14090, 14388, 15646, 15802, 16466, 17635,
  17945, 18015, 18109, 17835, 18121, 18315, 18380, 18380, 18970, 18781, 18939,
  17530, 16615, 15836, 15640, 15132, 14602, 13681, 13683, 13266, 12951, 12636,
  12318, 12268, 12991, 12815, 12767, 11820, 12109, 9073, 9548, 9218, 9495,
  9125, 9954, 9161, 8752, 7529, 6500, 6533, 5798, 5252, 4848, 4197, 3897, 3146,
  2634, 2249, 1662, 1363, 1105, 787, 489, 428, 244, 121, 175
 )
 # Lifetable
 death_probability_single_age <- c(
  3.38721, 0.15683, 0.14743, 0.13804, 0.12864, 0.10225, 0.09685, 0.09145,
  0.08604, 0.08064, 0.0613, 0.05685, 0.0524, 0.06171, 0.08478, 0.14544,
  0.17649, 0.20754, 0.24075, 0.2761, 0.3232, 0.3605, 0.39778, 0.4076, 0.38999,
  0.31309, 0.30039, 0.28769, 0.29181, 0.31274, 0.36298, 0.3856, 0.40821,
  0.43583, 0.46846, 0.49971, 0.53223, 0.56475, 0.60691, 0.65872, 0.70462,
  0.75604, 0.80746, 0.88455, 0.98732, 1.09855, 1.20234, 1.30612, 1.4506,
  1.63575, 1.85266, 2.04142, 2.23008, 2.44738, 2.69334, 2.89516, 3.13729,
  3.37928, 3.70303, 4.10843, 4.52209, 4.92773, 5.33301, 5.86847, 6.53325,
  7.10771, 7.76423, 8.41774, 9.41929, 10.769, 12.27003, 13.63954, 15.00609,
  16.89305, 19.29446, 21.49802, 23.8437, 26.16875, 29.66179, 34.32709,
  39.11341, 43.79693, 48.46659, 55.39584, 64.56486, 73.98942, 83.07488,
  92.09783, 104.93058, 121.5472, 141.31298, 158.04674, 174.63864, 193.16815,
  213.631, 231.45606, 251.41878, 271.22361, 294.79518, 322.15583, 351.36235,
  378.59252, 405.70656, 435.07719, 466.76579, 500.7487, 532.45002, 564.13332,
  594.77679, 624.47408, 655.04777, 684.91851, 714.81042, 741.177, 764.04937,
  786.2723, 809.16477, 832.00764, 850.96336, 866.06502
 ) / 1000

 init_pop <- function(population_by_age) {
  population <- tibble(
    id = integer(0), age = integer(0), health_risk = numeric(0)
  )

  for (age in seq_along(population_by_age)) {
    count <- population_by_age[age] # Number of individuals for this age
    health_risk <- runif(count) # Random health risk values
    population <- rbind(population, tibble(
      id = seq(nrow(population) + 1, nrow(population) + count),
      age = rep(age - 1, count) + runif(count, min = 0, max = 51 / 52),
      age_group = cut(
        round(age),
        breaks = seq(0, 120, by = 20),
        labels = c("0-19", "20-39", "40-59", "60-79", "80-99", "100+"),
        include.lowest = TRUE # Include the lowest value in the first interval
      ),
      health_risk = health_risk,
      health_status = cut(
        health_risk,
        breaks = seq(0, 1, by = 0.2),
        labels = c("Very Good", "Good", "Fair", "Poor", "Very Poor"),
        include.lowest = TRUE # Include the lowest value in the first interval
      )
    ))
  }

  population
 }

 simulate_week <- function(week, population, high_mortality_factor = 1) {
  mortality_prob <- weekly_mortality_risks[population$age + 1]
  adjusted_mortality_prob <-
    mortality_prob * (1 - population$health_risk) +
    (mortality_prob * population$health_risk) * high_mortality_factor
  deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob)
  death_ids <- population$id[deaths == 1]
  deaths <- population[population$id %in% death_ids, ]
  deaths$week <- week
  population <- population[!population$id %in% death_ids, ]
  population$age <- population$age + 1 / weeks_per_year
  list(population = population, deaths = deaths |>
    select(week, age_group, health_status))
 }

 ages <- seq_along(population_bergamo_single_ages) - 1
 population <- init_pop(population_bergamo_single_ages)
 weeks_per_year <- 52

 simulation_duration_weeks <- 3 * weeks_per_year
 weekly_mortality_risks <-
  1 - (1 - death_probability_single_age)^(1 / weeks_per_year)

 # High mortality event effect profile
 euro <- fread(paste0(
  "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/",
  "demo_r_mwk3_t?format=TSV"
 ))
 x <- unlist(euro[euro[[1]] %like% "ITC46"])[-1]
 x <- x[x != ":"]
 d <- data.table(
  year = as.integer(substr(names(x), 1, 4)),
  week = as.integer(substr(names(x), 7, 8))
 )[, x := 1:.N]
 d$dead <- as.integer(sub(" .*", "", x))
 d$trend <- d[year %in% 2013:2019, predict(lm(dead ~ x), d)]
 d <- merge(d, d[year %in% 2013:2019, .(weekly = mean(dead - trend)), week])
 high_mortality_factors <-
  d[year == 2020 & week <= 26, dead / (trend + weekly)][9:19]
 high_mortality_start_week <- 1 * weeks_per_year + 1
 high_mortality_weeks <- high_mortality_start_week:(
  high_mortality_start_week + length(high_mortality_factors) - 1)

 sim1 <- tibble()
 sim2 <- tibble()
 population_before <- population
 for (sim in 1:10) {
  set.seed(sim)
  population <- population_before

  for (week in 1:simulation_duration_weeks) {
    r <- simulate_week(week, population, 1)
    population <- r$population
    sim1 <- rbind(sim1, r$deaths)
  }

  set.seed(sim)
  population <- population_before
  for (week in 1:simulation_duration_weeks) {
    high_mortality_factor <- ifelse(
      week %in% high_mortality_weeks,
      high_mortality_factors[week - min(high_mortality_weeks) + 1], 1
    )
    r <- simulate_week(week, population, high_mortality_factor)
    population <- r$population
    sim2 <- rbind(sim2, r$deaths)
  }
 }

 # Prepare data for plotting
 result <- sim1 |>
  group_by(week) |>
  summarize(deaths1 = n() / 10, .groups = "drop") |>
  as_tsibble(index = week) |>
  inner_join(
    sim2 |>
      group_by(week) |>
      summarize(deaths2 = n() / 10, .groups = "drop") |>
      as_tsibble(index = week),
    by = join_by(week)
  )

 # Fit baseline model
 baseline_model <- result |>
  filter(week < high_mortality_start_week) |>
  model(TSLM(deaths1 ~ trend()))

 forecast_data <- baseline_model |>
  forecast(h = simulation_duration_weeks - high_mortality_start_week) |>
  fabletools::hilo(95) |> # Add 95% prediction interval
  fabletools::unpack_hilo(cols = "95%")

 # Extract fitted values from the baseline model
 fitted_values <- baseline_model %>%
  augment() %>%
  select(week, .mean = .fitted)

 # Combine original data and forecasts with prediction intervals
 weekly_deaths <- result |>
  left_join(fitted_values, by = "week") %>%
  left_join(
    forecast_data |>
      as_tibble() |>
      select(week, .mean, `95%_lower`, `95%_upper`), # Include prediction bounds
    by = "week"
  ) |>
  mutate(.mean = ifelse(is.na(.mean.x), .mean.y, .mean.x)) |>
  mutate(
    sma_a = slider::slide_dbl(deaths1, mean, .before = 12, .complete = TRUE),
    sma_b = slider::slide_dbl(deaths2, mean, .before = 12, .complete = TRUE)
  )

 # Plot with trendline and prediction intervals
 chart1 <- ggplot(weekly_deaths, aes(x = week)) +
  geom_line(aes(y = deaths2), color = "red", linetype = "dotted") +
  geom_line(aes(y = deaths1), color = "black", linetype = "dotted") +
  geom_line(aes(y = sma_b), color = "red") +
  geom_line(aes(y = sma_a), color = "black") +
  geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
  # geom_line(aes(y = deaths2), color = "red") +
  # geom_line(aes(y = deaths1), color = "black") +
  geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
  geom_ribbon(aes(ymin = `95%_lower`, ymax = `95%_upper`),
    fill = "blue", alpha = 0.2
  ) +
  labs(
    title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]",
    subtitle = paste0(
      "Population: ", formatC(nrow(population), format = "d", big.mark = ","),
      ", High Mortality Event Starting Week 100",
      ", Death Risk & Population from Bergamo, Italy"
    ),
    caption = "dotted = weekly · solid = 13w SMA · Mean of 10 Simulations",
    x = "Week",
    y = "Deaths"
  )

 chart2 <- chart1 + coord_cartesian(ylim = c(150, 300))

 # Calculate yearly shift
 weekly_deaths |>
  as_tibble() |>
  filter(!week %in% high_mortality_weeks) |>
  mutate(year = floor((week - 1) / weeks_per_year)) |>
  select(year, deaths1, deaths2) |>
  group_by(year) |>
  summarize(
    deaths1 = sum(deaths1),
    deaths2 = sum(deaths2),
    ratio = deaths1 / deaths2 - 1,
    .groups = "drop"
  )

 # Plot population
 chart3 <- bind_rows(
  population_before |>
    group_by(age) |>
    summarize(n = n(), .groups = "drop") |>
    mutate(age = floor(age), status = "Before"),
  population |>
    group_by(age) |>
    summarize(n = n(), .groups = "drop") |>
    mutate(age = floor(age), status = "After")
 ) |>
  ggplot(aes(x = age, y = n, color = status)) +
  geom_line() +
  labs(
    title = "Population Before and After Simulation",
    x = "Age",
    y = "Count",
    color = "Status"
  )


 chart4 <- sim2 |>
  group_by(week, age_group) |>
  summarize(deaths = n(), .groups = "drop") |>
  ggplot(aes(x = week, y = deaths)) +
  geom_line() +
  facet_wrap(vars(age_group), scales = "free") +
  labs(title = "Deaths by Age Group")


 chart5 <- sim2 |>
  group_by(week, health_status) |>
  summarize(deaths = n(), .groups = "drop") |>
  ggplot(aes(x = week, y = deaths)) +
  geom_line() +
  facet_wrap(vars(health_status), scales = "free") +
  labs(title = "Deaths by Health Status")


 charts <- list(chart1, chart2, chart3, chart4, chart5)
 # Loop to save each chart
 for (i in seq_along(charts)) {
  ggsave(
    filename = paste0("chart", i, ".png"), plot = charts[[i]],
    width = 8, height = 4, units = "in", dpi = 300
  )
 }

 # source("/Users/ben/Downloads/1.r")
	library(tsibble)
	library(fable)
	library(ggplot2)
	library(tidyverse)
	library(data.table)

	sf <- 2
	width <- 600 * sf
	height <- 335 * sf
	options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))

	# Source: http://dati.istat.it; Bergamo 2019
	population_bergamo_single_ages <- c(
	8448, 9068, 9406, 9530, 10015, 10300, 10772, 11215, 11435, 11805, 11675,
	11540, 11622, 11385, 11388, 11443, 11267, 11344, 11572, 11302, 11463, 11393,
	11030, 11005, 10834, 10903, 11282, 11360, 11539, 11441, 12060, 11774, 11695,
	12277, 12793, 13000, 13353, 13665, 14090, 14388, 15646, 15802, 16466, 17635,
	17945, 18015, 18109, 17835, 18121, 18315, 18380, 18380, 18970, 18781, 18939,
	17530, 16615, 15836, 15640, 15132, 14602, 13681, 13683, 13266, 12951, 12636,
	12318, 12268, 12991, 12815, 12767, 11820, 12109, 9073, 9548, 9218, 9495,
	9125, 9954, 9161, 8752, 7529, 6500, 6533, 5798, 5252, 4848, 4197, 3897, 3146,
	2634, 2249, 1662, 1363, 1105, 787, 489, 428, 244, 121, 175
	)
	# Lifetable
	death_probability_single_age <- c(
	3.38721, 0.15683, 0.14743, 0.13804, 0.12864, 0.10225, 0.09685, 0.09145,
	0.08604, 0.08064, 0.0613, 0.05685, 0.0524, 0.06171, 0.08478, 0.14544,
	0.17649, 0.20754, 0.24075, 0.2761, 0.3232, 0.3605, 0.39778, 0.4076, 0.38999,
	0.31309, 0.30039, 0.28769, 0.29181, 0.31274, 0.36298, 0.3856, 0.40821,
	0.43583, 0.46846, 0.49971, 0.53223, 0.56475, 0.60691, 0.65872, 0.70462,
	0.75604, 0.80746, 0.88455, 0.98732, 1.09855, 1.20234, 1.30612, 1.4506,
	1.63575, 1.85266, 2.04142, 2.23008, 2.44738, 2.69334, 2.89516, 3.13729,
	3.37928, 3.70303, 4.10843, 4.52209, 4.92773, 5.33301, 5.86847, 6.53325,
	7.10771, 7.76423, 8.41774, 9.41929, 10.769, 12.27003, 13.63954, 15.00609,
	16.89305, 19.29446, 21.49802, 23.8437, 26.16875, 29.66179, 34.32709,
	39.11341, 43.79693, 48.46659, 55.39584, 64.56486, 73.98942, 83.07488,
	92.09783, 104.93058, 121.5472, 141.31298, 158.04674, 174.63864, 193.16815,
	213.631, 231.45606, 251.41878, 271.22361, 294.79518, 322.15583, 351.36235,
	378.59252, 405.70656, 435.07719, 466.76579, 500.7487, 532.45002, 564.13332,
	594.77679, 624.47408, 655.04777, 684.91851, 714.81042, 741.177, 764.04937,
	786.2723, 809.16477, 832.00764, 850.96336, 866.06502
	) / 1000

	init_pop <- function(population_by_age) {
	population <- tibble(
	id = integer(0), age = integer(0), health_risk = numeric(0)
	)

	for (age in seq_along(population_by_age)) {
	count <- population_by_age[age] # Number of individuals for this age
	health_risk <- runif(count) # Random health risk values
	population <- rbind(population, tibble(
	id = seq(nrow(population) + 1, nrow(population) + count),
	age = rep(age - 1, count) + runif(count, min = 0, max = 51 / 52),
	age_group = cut(
	round(age),
	breaks = seq(0, 120, by = 20),
	labels = c("0-19", "20-39", "40-59", "60-79", "80-99", "100+"),
	include.lowest = TRUE # Include the lowest value in the first interval
	),
	health_risk = health_risk,
	health_status = cut(
	health_risk,
	breaks = seq(0, 1, by = 0.2),
	labels = c("Very Good", "Good", "Fair", "Poor", "Very Poor"),
	include.lowest = TRUE # Include the lowest value in the first interval
	)
	))
	}

	population
	}

	simulate_week <- function(week, population, high_mortality_factor = 1) {
	mortality_prob <- weekly_mortality_risks[population$age + 1]
	adjusted_mortality_prob <-
	mortality_prob * (1 - population$health_risk) +
	(mortality_prob * population$health_risk) * high_mortality_factor
	deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob)
	death_ids <- population$id[deaths == 1]
	deaths <- population[population$id %in% death_ids, ]
	deaths$week <- week
	population <- population[!population$id %in% death_ids, ]
	population$age <- population$age + 1 / weeks_per_year
	list(population = population, deaths = deaths \|>
	select(week, age_group, health_status))
	}

	ages <- seq_along(population_bergamo_single_ages) - 1
	population <- init_pop(population_bergamo_single_ages)
	weeks_per_year <- 52

	simulation_duration_weeks <- 3 * weeks_per_year
	weekly_mortality_risks <-
	1 - (1 - death_probability_single_age)^(1 / weeks_per_year)

	# High mortality event effect profile
	euro <- fread(paste0(
	"https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/",
	"demo_r_mwk3_t?format=TSV"
	))
	x <- unlist(euro[euro[[1]] %like% "ITC46"])[-1]
	x <- x[x != ":"]
	d <- data.table(
	year = as.integer(substr(names(x), 1, 4)),
	week = as.integer(substr(names(x), 7, 8))
	)[, x := 1:.N]
	d$dead <- as.integer(sub(" .*", "", x))
	d$trend <- d[year %in% 2013:2019, predict(lm(dead ~ x), d)]
	d <- merge(d, d[year %in% 2013:2019, .(weekly = mean(dead - trend)), week])
	high_mortality_factors <-
	d[year == 2020 & week <= 26, dead / (trend + weekly)][9:19]
	high_mortality_start_week <- 1 * weeks_per_year + 1
	high_mortality_weeks <- high_mortality_start_week:(
	high_mortality_start_week + length(high_mortality_factors) - 1)

	sim1 <- tibble()
	sim2 <- tibble()
	population_before <- population
	for (sim in 1:10) {
	set.seed(sim)
	population <- population_before

	for (week in 1:simulation_duration_weeks) {
	r <- simulate_week(week, population, 1)
	population <- r$population
	sim1 <- rbind(sim1, r$deaths)
	}

	set.seed(sim)
	population <- population_before
	for (week in 1:simulation_duration_weeks) {
	high_mortality_factor <- ifelse(
	week %in% high_mortality_weeks,
	high_mortality_factors[week - min(high_mortality_weeks) + 1], 1
	)
	r <- simulate_week(week, population, high_mortality_factor)
	population <- r$population
	sim2 <- rbind(sim2, r$deaths)
	}
	}

	# Prepare data for plotting
	result <- sim1 \|>
	group_by(week) \|>
	summarize(deaths1 = n() / 10, .groups = "drop") \|>
	as_tsibble(index = week) \|>
	inner_join(
	sim2 \|>
	group_by(week) \|>
	summarize(deaths2 = n() / 10, .groups = "drop") \|>
	as_tsibble(index = week),
	by = join_by(week)
	)

	# Fit baseline model
	baseline_model <- result \|>
	filter(week < high_mortality_start_week) \|>
	model(TSLM(deaths1 ~ trend()))

	forecast_data <- baseline_model \|>
	forecast(h = simulation_duration_weeks - high_mortality_start_week) \|>
	fabletools::hilo(95) \|> # Add 95% prediction interval
	fabletools::unpack_hilo(cols = "95%")

	# Extract fitted values from the baseline model
	fitted_values <- baseline_model %>%
	augment() %>%
	select(week, .mean = .fitted)

	# Combine original data and forecasts with prediction intervals
	weekly_deaths <- result \|>
	left_join(fitted_values, by = "week") %>%
	left_join(
	forecast_data \|>
	as_tibble() \|>
	select(week, .mean, `95%_lower`, `95%_upper`), # Include prediction bounds
	by = "week"
	) \|>
	mutate(.mean = ifelse(is.na(.mean.x), .mean.y, .mean.x)) \|>
	mutate(
	sma_a = slider::slide_dbl(deaths1, mean, .before = 12, .complete = TRUE),
	sma_b = slider::slide_dbl(deaths2, mean, .before = 12, .complete = TRUE)
	)

	# Plot with trendline and prediction intervals
	chart1 <- ggplot(weekly_deaths, aes(x = week)) +
	geom_line(aes(y = deaths2), color = "red", linetype = "dotted") +
	geom_line(aes(y = deaths1), color = "black", linetype = "dotted") +
	geom_line(aes(y = sma_b), color = "red") +
	geom_line(aes(y = sma_a), color = "black") +
	geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
	# geom_line(aes(y = deaths2), color = "red") +
	# geom_line(aes(y = deaths1), color = "black") +
	geom_line(aes(y = .mean), color = "blue", linetype = "dashed") +
	geom_ribbon(aes(ymin = `95%_lower`, ymax = `95%_upper`),
	fill = "blue", alpha = 0.2
	) +
	labs(
	title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]",
	subtitle = paste0(
	"Population: ", formatC(nrow(population), format = "d", big.mark = ","),
	", High Mortality Event Starting Week 100",
	", Death Risk & Population from Bergamo, Italy"
	),
	caption = "dotted = weekly · solid = 13w SMA · Mean of 10 Simulations",
	x = "Week",
	y = "Deaths"
	)

	chart2 <- chart1 + coord_cartesian(ylim = c(150, 300))

	# Calculate yearly shift
	weekly_deaths \|>
	as_tibble() \|>
	filter(!week %in% high_mortality_weeks) \|>
	mutate(year = floor((week - 1) / weeks_per_year)) \|>
	select(year, deaths1, deaths2) \|>
	group_by(year) \|>
	summarize(
	deaths1 = sum(deaths1),
	deaths2 = sum(deaths2),
	ratio = deaths1 / deaths2 - 1,
	.groups = "drop"
	)

	# Plot population
	chart3 <- bind_rows(
	population_before \|>
	group_by(age) \|>
	summarize(n = n(), .groups = "drop") \|>
	mutate(age = floor(age), status = "Before"),
	population \|>
	group_by(age) \|>
	summarize(n = n(), .groups = "drop") \|>
	mutate(age = floor(age), status = "After")
	) \|>
	ggplot(aes(x = age, y = n, color = status)) +
	geom_line() +
	labs(
	title = "Population Before and After Simulation",
	x = "Age",
	y = "Count",
	color = "Status"
	)


	chart4 <- sim2 \|>
	group_by(week, age_group) \|>
	summarize(deaths = n(), .groups = "drop") \|>
	ggplot(aes(x = week, y = deaths)) +
	geom_line() +
	facet_wrap(vars(age_group), scales = "free") +
	labs(title = "Deaths by Age Group")


	chart5 <- sim2 \|>
	group_by(week, health_status) \|>
	summarize(deaths = n(), .groups = "drop") \|>
	ggplot(aes(x = week, y = deaths)) +
	geom_line() +
	facet_wrap(vars(health_status), scales = "free") +
	labs(title = "Deaths by Health Status")


	charts <- list(chart1, chart2, chart3, chart4, chart5)
	# Loop to save each chart
	for (i in seq_along(charts)) {
	ggsave(
	filename = paste0("chart", i, ".png"), plot = charts[[i]],
	width = 8, height = 4, units = "in", dpi = 300
	)
	}

	# source("/Users/ben/Downloads/1.r")