USMortality · November 24, 2024 01:55
diff --git a/pull_forward_sim.r b/pull_forward_sim.r
 # Load necessary libraries
 library(dplyr)
 library(ggplot2)

 sf <- 2
 width <- 600 * sf
 height <- 335 * sf

 options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))


 # Define parameters
 population_size <- 10000
 simulation_weeks <- 52 * 3
 max_age <- 120

 # 1. Use Real world
 # https://www.mortality.watch/explorer/?c=ITA&t=cmr&cs=bar&df=2019&dt=2019
 # &ag=0-9&ag=10-19&ag=20-29&ag=30-39&ag=40-49&ag=50-59&ag=60-69&ag=70-79
 # &ag=80%2B&sb=0&lg=1
 # Define the data
 age_groups <- c(
  "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+"
 )
 mid_ages <- c(5, 15, 25, 35, 45, 55, 65, 75, 90) # Representative midpoints
 # Deaths per 100,000
 mortality_counts <- c(31, 13, 30, 46, 116, 303, 773, 2125, 9715)

 # Convert mortality counts to rates
 mortality_rates <- mortality_counts / sum(mortality_counts)

 # Create a data frame for ggplot
 data <- data.frame(
  mid_ages = mid_ages,
  mortality_rates = mortality_rates,
  mortality_counts = mortality_counts
 )

 # Perform LOESS smoothing
 loess_fit <- loess(mortality_rates ~ mid_ages, span = 0.62)
 ages <- 0:120
 predicted_risks_loess <- predict(
  loess_fit,
  newdata = data.frame(mid_ages = ages)
 )

 # Create a data frame for the smoothed predictions
 smoothed_data <- data.frame(
  ages = ages,
  predicted_risks_loess = predicted_risks_loess
 )

 # Plot with ggplot2
 chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) +
  geom_point(color = "red", size = 3) +
  geom_line(
    data = smoothed_data,
    aes(x = ages, y = predicted_risks_loess), color = "blue", size = 1
  ) +
  scale_y_log10(labels = scales::percent_format(scale = 100)) +
  labs(
    title = "Mortality Risk by Age (Loess Fit)",
    x = "Age (Years)",
    y = "Mortality Risk (Log Scale)"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "top"
  )

 ggsave(
  filename = "chart1.png", plot = chart,
  width = 8, height = 4, units = "in", dpi = 300
 )


 # 2. Fit a log-linear model, to simplify the mortality rates by age.

 # Fit a log-linear model
 log_mortality_rates <- log(mortality_rates)
 log_model <- lm(log_mortality_rates ~ mid_ages)

 # Extract coefficients
 a <- coef(log_model)[2]
 b <- coef(log_model)[1]

 # Predict using the log-linear model
 ages <- 0:120
 predicted_log_mortality <- predict(
  log_model,
  newdata = data.frame(mid_ages = ages)
 )
 predicted_mortality <- pmin(1, exp(predicted_log_mortality))

 # Create data frames for ggplot
 data <- data.frame(
  mid_ages = mid_ages,
  mortality_rates = mortality_rates
 )

 smoothed_data <- data.frame(
  ages = ages,
  predicted_mortality = predicted_mortality
 )

 # Plot with ggplot2
 chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) +
  geom_point(color = "red", size = 3) +
  geom_line(
    data = smoothed_data, aes(x = ages, y = predicted_mortality),
    color = "blue", size = 1
  ) +
  scale_y_log10() + # Log scale for y-axis
  labs(
    title = "Mortality Risk by Age (Log-Linear Fit)",
    x = "Age (Years)",
    y = "Mortality Risk (Log Scale)"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none"
  )

 ggsave(
  filename = "chart2.png", plot = chart,
  width = 8, height = 4, units = "in", dpi = 300
 )


 cat(sprintf(
  "Italian mortality risk by age ≈ exp(%.4f) * exp(%.4f * age)\n", b, a
 ))


 # 3. Basic Simulation

 # Parameters
 max_age <- 120
 population_size <- 100000
 weeks_per_year <- 52
 simulation_duration_weeks <- 3 * weeks_per_year

 # Convert annual mortality rates to weekly probabilities
 ages <- 0:max_age
 survival_prob <- cumprod(1 - predicted_mortality)
 weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year)

 # Initialize the population
 set.seed(123) # For reproducibility
 age_distribution <- survival_prob / sum(survival_prob)
 initial_ages <- sample(
  ages,
  size = population_size, replace = TRUE, prob = age_distribution
 )
 population <- data.frame(ID = 1:population_size, Age = initial_ages)

 population |>
  group_by(Age) |>
  summarize(n = n()) |>
  ggplot(aes(x = Age, y = n)) +
  geom_line()


 # Function to simulate one week
 simulate_week <- function(population, weekly_mortality_risks) {
  # Calculate individual mortality probabilities
  individual_mortality_prob <- weekly_mortality_risks[population$Age + 1]

  # Simulate deaths
  deaths <- rbinom(nrow(population), 1, individual_mortality_prob)
  death_ids <- population$ID[deaths == 1]

  # Update population: Remove those who died
  population <- population[!population$ID %in% death_ids, ]

  # Remove individuals at max_age
  population <- population[population$Age < max_age, ]

  # Age everyone by 1 week
  population$Age <- population$Age + 1 / weeks_per_year

  # Add newborns equal to the number of deaths
  num_newborns <- length(death_ids)
  if (num_newborns > 0) {
    new_children <- data.frame(
      ID = (max(population$ID) + 1):(max(population$ID) + num_newborns),
      Age = rep(0, num_newborns)
    )
    population <- rbind(population, new_children)
  }

  # Return updated population and death count
  list(population = population, deaths = length(death_ids))
 }

 # Add a seasonal factor
 seasonal_factor <- function(week) {
  1 + 0.5 * sin(2 * pi * (week / weeks_per_year))
 }

 # Simulation loop with seasonal mortality risks
 weekly_deaths_seasonal <- numeric(simulation_duration_weeks)

 for (week in 1:simulation_duration_weeks) {
  # Apply the seasonal factor to the mortality risks
  adjusted_weekly_risks <- weekly_mortality_risks * seasonal_factor(week)

  # Simulate one week
  result <- simulate_week(population, adjusted_weekly_risks)
  population <- result$population
  weekly_deaths_seasonal[week] <- result$deaths

  print(paste0(
    "Week: ", week, "; Deaths: ", weekly_deaths_seasonal[week],
    "; Population: ", nrow(population)
  ))
 }

 # Prepare data for plotting
 simulation_data_seasonal <- data.frame(
  Week = 1:simulation_duration_weeks,
  Deaths = weekly_deaths_seasonal
 )

 # Plot weekly deaths with seasonal adjustment
 chart_seasonal <- ggplot(simulation_data_seasonal, aes(x = Week, y = Deaths)) +
  geom_line(color = "blue") +
  labs(
    title = "Weekly Deaths Over 3 Years (Seasonal Adjustment)",
    x = "Week",
    y = "Number of Deaths"
  )

 # Save the seasonal chart
 ggplot2::ggsave(
  filename = "chart3.png", plot = chart_seasonal,
  width = width, height = height, units = "px", dpi = 72 * sf,
  device = grDevices::png, type = c("cairo")
 )


 # 4. Simulation

 # Parameters
 max_age <- 120
 population_size <- 100000
 weeks_per_year <- 52
 simulation_duration_weeks <- 5 * weeks_per_year

 # Example mortality rates (replace this with actual data)
 predicted_mortality <- seq(0.00001, 0.2, length.out = max_age + 1)

 # Convert annual mortality rates to weekly probabilities
 ages <- 0:max_age
 survival_prob <- cumprod(1 - predicted_mortality)
 weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year)

 # Initialize the population
 set.seed(123) # For reproducibility
 age_distribution <- survival_prob / sum(survival_prob)
 initial_ages <- sample(
  ages,
  size = population_size, replace = TRUE, prob = age_distribution
 )
 population <- data.frame(
  ID = 1:population_size,
  Age = initial_ages,
  Health = sample(
    c(1, 2, 4, 8),
    population_size,
    replace = TRUE,
    prob = c(0.3, 0.3, 0.3, 0.1)
  )
 )

 population %>%
  group_by(Age) %>%
  summarize(n = n()) %>%
  ggplot(aes(x = Age, y = n)) +
  geom_line() +
  labs(title = "Age Distribution", x = "Age", y = "Count")

 # Function to simulate one week
 simulate_week <- function(population, seasonal_factor, high_mortality_factor) {
  # Calculate base mortality probabilities based on age
  base_mortality_prob <- weekly_mortality_risks[population$Age + 1]

  # Adjust mortality probabilities by Seasonal, Health & Extra Factor
  adjusted_mortality_prob <-
    base_mortality_prob * seasonal_factor *
      population$Health^high_mortality_factor

  # Ensure probabilities don't exceed 1
  adjusted_mortality_prob <- pmin(adjusted_mortality_prob, 1)

  # Simulate deaths
  deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob)
  death_ids <- population$ID[deaths == 1]

  # Update population: Remove those who died
  population <- population[!population$ID %in% death_ids, ]

  # Age everyone by 1 week
  population$Age <- population$Age + 1 / weeks_per_year

  # Add newborns equal to the number of deaths
  num_newborns <- length(death_ids)
  if (num_newborns > 0) {
    new_children <- data.frame(
      ID = (max(population$ID) + 1):(max(population$ID) + num_newborns),
      Age = rep(0, num_newborns),
      Health = sample(
        c(1, 2, 4, 8),
        num_newborns,
        replace = TRUE,
        prob = c(0.3, 0.3, 0.3, 0.1)
      )
    )
    population <- rbind(population, new_children)
  }

  # Return updated population and death count
  list(population = population, deaths = length(death_ids))
 }

 # Add a seasonal factor
 get_seasonal_factor <- function(week) {
  1 + 0.5 * sin(2 * pi * (week / weeks_per_year))
 }

 # High mortality event effect profile
 high_mortality_factors <- c(1.1, 1.5, 2, 1.8, 1.5, 1.5, 1.1)
 high_mortality_weeks <- 100:(100 + length(high_mortality_factors) - 1)

 # Simulation loop with seasonal mortality risks and high mortality event
 weekly_deaths <- numeric(simulation_duration_weeks)

 for (week in 1:simulation_duration_weeks) {
  high_mortality_factor <- ifelse(
    week %in% high_mortality_weeks,
    high_mortality_factors[week - min(high_mortality_weeks) + 1],
    1
  )
  # Simulate one week
  result <- simulate_week(
    population,
    seasonal_factor = get_seasonal_factor(week),
    high_mortality_factor = high_mortality_factor
  )
  population <- result$population
  weekly_deaths[week] <- result$deaths

  print(paste0(
    "Week: ", week, "; Deaths: ", weekly_deaths[week],
    "; Population: ", nrow(population)
  ))
 }

 # Prepare data for plotting
 sim_data <- data.frame(
  Week = 1:simulation_duration_weeks,
  Deaths = weekly_deaths
 )

 # Plot weekly deaths with high mortality event
 chart <- ggplot(sim_data, aes(x = Week, y = Deaths)) +
  geom_line(color = "black") +
  labs(
    title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]",
    subtitle = paste0(
      "Population: ", formatC(population_size, format = "d", big.mark = ","),
      ", High Mortality Event Starting Week 100"
    ),
    x = "Week",
    y = "Deaths"
  )

 # Save the chart
 ggsave(
  filename = "chart4.png", plot = chart,
  width = 8, height = 4, units = "in", dpi = 300
 )

 # source("/Users/ben/Downloads/1.r")
	# Load necessary libraries
	library(dplyr)
	library(ggplot2)

	sf <- 2
	width <- 600 * sf
	height <- 335 * sf

	options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))


	# Define parameters
	population_size <- 10000
	simulation_weeks <- 52 * 3
	max_age <- 120

	# 1. Use Real world
	# https://www.mortality.watch/explorer/?c=ITA&t=cmr&cs=bar&df=2019&dt=2019
	# &ag=0-9&ag=10-19&ag=20-29&ag=30-39&ag=40-49&ag=50-59&ag=60-69&ag=70-79
	# &ag=80%2B&sb=0&lg=1
	# Define the data
	age_groups <- c(
	"0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+"
	)
	mid_ages <- c(5, 15, 25, 35, 45, 55, 65, 75, 90) # Representative midpoints
	# Deaths per 100,000
	mortality_counts <- c(31, 13, 30, 46, 116, 303, 773, 2125, 9715)

	# Convert mortality counts to rates
	mortality_rates <- mortality_counts / sum(mortality_counts)

	# Create a data frame for ggplot
	data <- data.frame(
	mid_ages = mid_ages,
	mortality_rates = mortality_rates,
	mortality_counts = mortality_counts
	)

	# Perform LOESS smoothing
	loess_fit <- loess(mortality_rates ~ mid_ages, span = 0.62)
	ages <- 0:120
	predicted_risks_loess <- predict(
	loess_fit,
	newdata = data.frame(mid_ages = ages)
	)

	# Create a data frame for the smoothed predictions
	smoothed_data <- data.frame(
	ages = ages,
	predicted_risks_loess = predicted_risks_loess
	)

	# Plot with ggplot2
	chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) +
	geom_point(color = "red", size = 3) +
	geom_line(
	data = smoothed_data,
	aes(x = ages, y = predicted_risks_loess), color = "blue", size = 1
	) +
	scale_y_log10(labels = scales::percent_format(scale = 100)) +
	labs(
	title = "Mortality Risk by Age (Loess Fit)",
	x = "Age (Years)",
	y = "Mortality Risk (Log Scale)"
	) +
	theme(
	plot.title = element_text(hjust = 0.5),
	legend.position = "top"
	)

	ggsave(
	filename = "chart1.png", plot = chart,
	width = 8, height = 4, units = "in", dpi = 300
	)


	# 2. Fit a log-linear model, to simplify the mortality rates by age.

	# Fit a log-linear model
	log_mortality_rates <- log(mortality_rates)
	log_model <- lm(log_mortality_rates ~ mid_ages)

	# Extract coefficients
	a <- coef(log_model)[2]
	b <- coef(log_model)[1]

	# Predict using the log-linear model
	ages <- 0:120
	predicted_log_mortality <- predict(
	log_model,
	newdata = data.frame(mid_ages = ages)
	)
	predicted_mortality <- pmin(1, exp(predicted_log_mortality))

	# Create data frames for ggplot
	data <- data.frame(
	mid_ages = mid_ages,
	mortality_rates = mortality_rates
	)

	smoothed_data <- data.frame(
	ages = ages,
	predicted_mortality = predicted_mortality
	)

	# Plot with ggplot2
	chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) +
	geom_point(color = "red", size = 3) +
	geom_line(
	data = smoothed_data, aes(x = ages, y = predicted_mortality),
	color = "blue", size = 1
	) +
	scale_y_log10() + # Log scale for y-axis
	labs(
	title = "Mortality Risk by Age (Log-Linear Fit)",
	x = "Age (Years)",
	y = "Mortality Risk (Log Scale)"
	) +
	theme(
	plot.title = element_text(hjust = 0.5),
	legend.position = "none"
	)

	ggsave(
	filename = "chart2.png", plot = chart,
	width = 8, height = 4, units = "in", dpi = 300
	)


	cat(sprintf(
	"Italian mortality risk by age ≈ exp(%.4f) * exp(%.4f * age)\n", b, a
	))


	# 3. Basic Simulation

	# Parameters
	max_age <- 120
	population_size <- 100000
	weeks_per_year <- 52
	simulation_duration_weeks <- 3 * weeks_per_year

	# Convert annual mortality rates to weekly probabilities
	ages <- 0:max_age
	survival_prob <- cumprod(1 - predicted_mortality)
	weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year)

	# Initialize the population
	set.seed(123) # For reproducibility
	age_distribution <- survival_prob / sum(survival_prob)
	initial_ages <- sample(
	ages,
	size = population_size, replace = TRUE, prob = age_distribution
	)
	population <- data.frame(ID = 1:population_size, Age = initial_ages)

	population \|>
	group_by(Age) \|>
	summarize(n = n()) \|>
	ggplot(aes(x = Age, y = n)) +
	geom_line()


	# Function to simulate one week
	simulate_week <- function(population, weekly_mortality_risks) {
	# Calculate individual mortality probabilities
	individual_mortality_prob <- weekly_mortality_risks[population$Age + 1]

	# Simulate deaths
	deaths <- rbinom(nrow(population), 1, individual_mortality_prob)
	death_ids <- population$ID[deaths == 1]

	# Update population: Remove those who died
	population <- population[!population$ID %in% death_ids, ]

	# Remove individuals at max_age
	population <- population[population$Age < max_age, ]

	# Age everyone by 1 week
	population$Age <- population$Age + 1 / weeks_per_year

	# Add newborns equal to the number of deaths
	num_newborns <- length(death_ids)
	if (num_newborns > 0) {
	new_children <- data.frame(
	ID = (max(population$ID) + 1):(max(population$ID) + num_newborns),
	Age = rep(0, num_newborns)
	)
	population <- rbind(population, new_children)
	}

	# Return updated population and death count
	list(population = population, deaths = length(death_ids))
	}

	# Add a seasonal factor
	seasonal_factor <- function(week) {
	1 + 0.5 * sin(2 * pi * (week / weeks_per_year))
	}

	# Simulation loop with seasonal mortality risks
	weekly_deaths_seasonal <- numeric(simulation_duration_weeks)

	for (week in 1:simulation_duration_weeks) {
	# Apply the seasonal factor to the mortality risks
	adjusted_weekly_risks <- weekly_mortality_risks * seasonal_factor(week)

	# Simulate one week
	result <- simulate_week(population, adjusted_weekly_risks)
	population <- result$population
	weekly_deaths_seasonal[week] <- result$deaths

	print(paste0(
	"Week: ", week, "; Deaths: ", weekly_deaths_seasonal[week],
	"; Population: ", nrow(population)
	))
	}

	# Prepare data for plotting
	simulation_data_seasonal <- data.frame(
	Week = 1:simulation_duration_weeks,
	Deaths = weekly_deaths_seasonal
	)

	# Plot weekly deaths with seasonal adjustment
	chart_seasonal <- ggplot(simulation_data_seasonal, aes(x = Week, y = Deaths)) +
	geom_line(color = "blue") +
	labs(
	title = "Weekly Deaths Over 3 Years (Seasonal Adjustment)",
	x = "Week",
	y = "Number of Deaths"
	)

	# Save the seasonal chart
	ggplot2::ggsave(
	filename = "chart3.png", plot = chart_seasonal,
	width = width, height = height, units = "px", dpi = 72 * sf,
	device = grDevices::png, type = c("cairo")
	)


	# 4. Simulation

	# Parameters
	max_age <- 120
	population_size <- 100000
	weeks_per_year <- 52
	simulation_duration_weeks <- 5 * weeks_per_year

	# Example mortality rates (replace this with actual data)
	predicted_mortality <- seq(0.00001, 0.2, length.out = max_age + 1)

	# Convert annual mortality rates to weekly probabilities
	ages <- 0:max_age
	survival_prob <- cumprod(1 - predicted_mortality)
	weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year)

	# Initialize the population
	set.seed(123) # For reproducibility
	age_distribution <- survival_prob / sum(survival_prob)
	initial_ages <- sample(
	ages,
	size = population_size, replace = TRUE, prob = age_distribution
	)
	population <- data.frame(
	ID = 1:population_size,
	Age = initial_ages,
	Health = sample(
	c(1, 2, 4, 8),
	population_size,
	replace = TRUE,
	prob = c(0.3, 0.3, 0.3, 0.1)
	)
	)

	population %>%
	group_by(Age) %>%
	summarize(n = n()) %>%
	ggplot(aes(x = Age, y = n)) +
	geom_line() +
	labs(title = "Age Distribution", x = "Age", y = "Count")

	# Function to simulate one week
	simulate_week <- function(population, seasonal_factor, high_mortality_factor) {
	# Calculate base mortality probabilities based on age
	base_mortality_prob <- weekly_mortality_risks[population$Age + 1]

	# Adjust mortality probabilities by Seasonal, Health & Extra Factor
	adjusted_mortality_prob <-
	base_mortality_prob * seasonal_factor *
	population$Health^high_mortality_factor

	# Ensure probabilities don't exceed 1
	adjusted_mortality_prob <- pmin(adjusted_mortality_prob, 1)

	# Simulate deaths
	deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob)
	death_ids <- population$ID[deaths == 1]

	# Update population: Remove those who died
	population <- population[!population$ID %in% death_ids, ]

	# Age everyone by 1 week
	population$Age <- population$Age + 1 / weeks_per_year

	# Add newborns equal to the number of deaths
	num_newborns <- length(death_ids)
	if (num_newborns > 0) {
	new_children <- data.frame(
	ID = (max(population$ID) + 1):(max(population$ID) + num_newborns),
	Age = rep(0, num_newborns),
	Health = sample(
	c(1, 2, 4, 8),
	num_newborns,
	replace = TRUE,
	prob = c(0.3, 0.3, 0.3, 0.1)
	)
	)
	population <- rbind(population, new_children)
	}

	# Return updated population and death count
	list(population = population, deaths = length(death_ids))
	}

	# Add a seasonal factor
	get_seasonal_factor <- function(week) {
	1 + 0.5 * sin(2 * pi * (week / weeks_per_year))
	}

	# High mortality event effect profile
	high_mortality_factors <- c(1.1, 1.5, 2, 1.8, 1.5, 1.5, 1.1)
	high_mortality_weeks <- 100:(100 + length(high_mortality_factors) - 1)

	# Simulation loop with seasonal mortality risks and high mortality event
	weekly_deaths <- numeric(simulation_duration_weeks)

	for (week in 1:simulation_duration_weeks) {
	high_mortality_factor <- ifelse(
	week %in% high_mortality_weeks,
	high_mortality_factors[week - min(high_mortality_weeks) + 1],
	1
	)
	# Simulate one week
	result <- simulate_week(
	population,
	seasonal_factor = get_seasonal_factor(week),
	high_mortality_factor = high_mortality_factor
	)
	population <- result$population
	weekly_deaths[week] <- result$deaths

	print(paste0(
	"Week: ", week, "; Deaths: ", weekly_deaths[week],
	"; Population: ", nrow(population)
	))
	}

	# Prepare data for plotting
	sim_data <- data.frame(
	Week = 1:simulation_duration_weeks,
	Deaths = weekly_deaths
	)

	# Plot weekly deaths with high mortality event
	chart <- ggplot(sim_data, aes(x = Week, y = Deaths)) +
	geom_line(color = "black") +
	labs(
	title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]",
	subtitle = paste0(
	"Population: ", formatC(population_size, format = "d", big.mark = ","),
	", High Mortality Event Starting Week 100"
	),
	x = "Week",
	y = "Deaths"
	)

	# Save the chart
	ggsave(
	filename = "chart4.png", plot = chart,
	width = 8, height = 4, units = "in", dpi = 300
	)

	# source("/Users/ben/Downloads/1.r")