Created
November 24, 2024 01:55
-
-
Save USMortality/8adad5b3b8eb84a2bcd3b43cf1e93c7c to your computer and use it in GitHub Desktop.
Pull Forward Effect Simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load necessary libraries | |
library(dplyr) | |
library(ggplot2) | |
sf <- 2 | |
width <- 600 * sf | |
height <- 335 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
# Define parameters | |
population_size <- 10000 | |
simulation_weeks <- 52 * 3 | |
max_age <- 120 | |
# 1. Use Real world | |
# https://www.mortality.watch/explorer/?c=ITA&t=cmr&cs=bar&df=2019&dt=2019 | |
# &ag=0-9&ag=10-19&ag=20-29&ag=30-39&ag=40-49&ag=50-59&ag=60-69&ag=70-79 | |
# &ag=80%2B&sb=0&lg=1 | |
# Define the data | |
age_groups <- c( | |
"0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+" | |
) | |
mid_ages <- c(5, 15, 25, 35, 45, 55, 65, 75, 90) # Representative midpoints | |
# Deaths per 100,000 | |
mortality_counts <- c(31, 13, 30, 46, 116, 303, 773, 2125, 9715) | |
# Convert mortality counts to rates | |
mortality_rates <- mortality_counts / sum(mortality_counts) | |
# Create a data frame for ggplot | |
data <- data.frame( | |
mid_ages = mid_ages, | |
mortality_rates = mortality_rates, | |
mortality_counts = mortality_counts | |
) | |
# Perform LOESS smoothing | |
loess_fit <- loess(mortality_rates ~ mid_ages, span = 0.62) | |
ages <- 0:120 | |
predicted_risks_loess <- predict( | |
loess_fit, | |
newdata = data.frame(mid_ages = ages) | |
) | |
# Create a data frame for the smoothed predictions | |
smoothed_data <- data.frame( | |
ages = ages, | |
predicted_risks_loess = predicted_risks_loess | |
) | |
# Plot with ggplot2 | |
chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) + | |
geom_point(color = "red", size = 3) + | |
geom_line( | |
data = smoothed_data, | |
aes(x = ages, y = predicted_risks_loess), color = "blue", size = 1 | |
) + | |
scale_y_log10(labels = scales::percent_format(scale = 100)) + | |
labs( | |
title = "Mortality Risk by Age (Loess Fit)", | |
x = "Age (Years)", | |
y = "Mortality Risk (Log Scale)" | |
) + | |
theme( | |
plot.title = element_text(hjust = 0.5), | |
legend.position = "top" | |
) | |
ggsave( | |
filename = "chart1.png", plot = chart, | |
width = 8, height = 4, units = "in", dpi = 300 | |
) | |
# 2. Fit a log-linear model, to simplify the mortality rates by age. | |
# Fit a log-linear model | |
log_mortality_rates <- log(mortality_rates) | |
log_model <- lm(log_mortality_rates ~ mid_ages) | |
# Extract coefficients | |
a <- coef(log_model)[2] | |
b <- coef(log_model)[1] | |
# Predict using the log-linear model | |
ages <- 0:120 | |
predicted_log_mortality <- predict( | |
log_model, | |
newdata = data.frame(mid_ages = ages) | |
) | |
predicted_mortality <- pmin(1, exp(predicted_log_mortality)) | |
# Create data frames for ggplot | |
data <- data.frame( | |
mid_ages = mid_ages, | |
mortality_rates = mortality_rates | |
) | |
smoothed_data <- data.frame( | |
ages = ages, | |
predicted_mortality = predicted_mortality | |
) | |
# Plot with ggplot2 | |
chart <- ggplot(data, aes(x = mid_ages, y = mortality_rates)) + | |
geom_point(color = "red", size = 3) + | |
geom_line( | |
data = smoothed_data, aes(x = ages, y = predicted_mortality), | |
color = "blue", size = 1 | |
) + | |
scale_y_log10() + # Log scale for y-axis | |
labs( | |
title = "Mortality Risk by Age (Log-Linear Fit)", | |
x = "Age (Years)", | |
y = "Mortality Risk (Log Scale)" | |
) + | |
theme( | |
plot.title = element_text(hjust = 0.5), | |
legend.position = "none" | |
) | |
ggsave( | |
filename = "chart2.png", plot = chart, | |
width = 8, height = 4, units = "in", dpi = 300 | |
) | |
cat(sprintf( | |
"Italian mortality risk by age ≈ exp(%.4f) * exp(%.4f * age)\n", b, a | |
)) | |
# 3. Basic Simulation | |
# Parameters | |
max_age <- 120 | |
population_size <- 100000 | |
weeks_per_year <- 52 | |
simulation_duration_weeks <- 3 * weeks_per_year | |
# Convert annual mortality rates to weekly probabilities | |
ages <- 0:max_age | |
survival_prob <- cumprod(1 - predicted_mortality) | |
weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year) | |
# Initialize the population | |
set.seed(123) # For reproducibility | |
age_distribution <- survival_prob / sum(survival_prob) | |
initial_ages <- sample( | |
ages, | |
size = population_size, replace = TRUE, prob = age_distribution | |
) | |
population <- data.frame(ID = 1:population_size, Age = initial_ages) | |
population |> | |
group_by(Age) |> | |
summarize(n = n()) |> | |
ggplot(aes(x = Age, y = n)) + | |
geom_line() | |
# Function to simulate one week | |
simulate_week <- function(population, weekly_mortality_risks) { | |
# Calculate individual mortality probabilities | |
individual_mortality_prob <- weekly_mortality_risks[population$Age + 1] | |
# Simulate deaths | |
deaths <- rbinom(nrow(population), 1, individual_mortality_prob) | |
death_ids <- population$ID[deaths == 1] | |
# Update population: Remove those who died | |
population <- population[!population$ID %in% death_ids, ] | |
# Remove individuals at max_age | |
population <- population[population$Age < max_age, ] | |
# Age everyone by 1 week | |
population$Age <- population$Age + 1 / weeks_per_year | |
# Add newborns equal to the number of deaths | |
num_newborns <- length(death_ids) | |
if (num_newborns > 0) { | |
new_children <- data.frame( | |
ID = (max(population$ID) + 1):(max(population$ID) + num_newborns), | |
Age = rep(0, num_newborns) | |
) | |
population <- rbind(population, new_children) | |
} | |
# Return updated population and death count | |
list(population = population, deaths = length(death_ids)) | |
} | |
# Add a seasonal factor | |
seasonal_factor <- function(week) { | |
1 + 0.5 * sin(2 * pi * (week / weeks_per_year)) | |
} | |
# Simulation loop with seasonal mortality risks | |
weekly_deaths_seasonal <- numeric(simulation_duration_weeks) | |
for (week in 1:simulation_duration_weeks) { | |
# Apply the seasonal factor to the mortality risks | |
adjusted_weekly_risks <- weekly_mortality_risks * seasonal_factor(week) | |
# Simulate one week | |
result <- simulate_week(population, adjusted_weekly_risks) | |
population <- result$population | |
weekly_deaths_seasonal[week] <- result$deaths | |
print(paste0( | |
"Week: ", week, "; Deaths: ", weekly_deaths_seasonal[week], | |
"; Population: ", nrow(population) | |
)) | |
} | |
# Prepare data for plotting | |
simulation_data_seasonal <- data.frame( | |
Week = 1:simulation_duration_weeks, | |
Deaths = weekly_deaths_seasonal | |
) | |
# Plot weekly deaths with seasonal adjustment | |
chart_seasonal <- ggplot(simulation_data_seasonal, aes(x = Week, y = Deaths)) + | |
geom_line(color = "blue") + | |
labs( | |
title = "Weekly Deaths Over 3 Years (Seasonal Adjustment)", | |
x = "Week", | |
y = "Number of Deaths" | |
) | |
# Save the seasonal chart | |
ggplot2::ggsave( | |
filename = "chart3.png", plot = chart_seasonal, | |
width = width, height = height, units = "px", dpi = 72 * sf, | |
device = grDevices::png, type = c("cairo") | |
) | |
# 4. Simulation | |
# Parameters | |
max_age <- 120 | |
population_size <- 100000 | |
weeks_per_year <- 52 | |
simulation_duration_weeks <- 5 * weeks_per_year | |
# Example mortality rates (replace this with actual data) | |
predicted_mortality <- seq(0.00001, 0.2, length.out = max_age + 1) | |
# Convert annual mortality rates to weekly probabilities | |
ages <- 0:max_age | |
survival_prob <- cumprod(1 - predicted_mortality) | |
weekly_mortality_risks <- 1 - (1 - predicted_mortality)^(1 / weeks_per_year) | |
# Initialize the population | |
set.seed(123) # For reproducibility | |
age_distribution <- survival_prob / sum(survival_prob) | |
initial_ages <- sample( | |
ages, | |
size = population_size, replace = TRUE, prob = age_distribution | |
) | |
population <- data.frame( | |
ID = 1:population_size, | |
Age = initial_ages, | |
Health = sample( | |
c(1, 2, 4, 8), | |
population_size, | |
replace = TRUE, | |
prob = c(0.3, 0.3, 0.3, 0.1) | |
) | |
) | |
population %>% | |
group_by(Age) %>% | |
summarize(n = n()) %>% | |
ggplot(aes(x = Age, y = n)) + | |
geom_line() + | |
labs(title = "Age Distribution", x = "Age", y = "Count") | |
# Function to simulate one week | |
simulate_week <- function(population, seasonal_factor, high_mortality_factor) { | |
# Calculate base mortality probabilities based on age | |
base_mortality_prob <- weekly_mortality_risks[population$Age + 1] | |
# Adjust mortality probabilities by Seasonal, Health & Extra Factor | |
adjusted_mortality_prob <- | |
base_mortality_prob * seasonal_factor * | |
population$Health^high_mortality_factor | |
# Ensure probabilities don't exceed 1 | |
adjusted_mortality_prob <- pmin(adjusted_mortality_prob, 1) | |
# Simulate deaths | |
deaths <- rbinom(nrow(population), 1, adjusted_mortality_prob) | |
death_ids <- population$ID[deaths == 1] | |
# Update population: Remove those who died | |
population <- population[!population$ID %in% death_ids, ] | |
# Age everyone by 1 week | |
population$Age <- population$Age + 1 / weeks_per_year | |
# Add newborns equal to the number of deaths | |
num_newborns <- length(death_ids) | |
if (num_newborns > 0) { | |
new_children <- data.frame( | |
ID = (max(population$ID) + 1):(max(population$ID) + num_newborns), | |
Age = rep(0, num_newborns), | |
Health = sample( | |
c(1, 2, 4, 8), | |
num_newborns, | |
replace = TRUE, | |
prob = c(0.3, 0.3, 0.3, 0.1) | |
) | |
) | |
population <- rbind(population, new_children) | |
} | |
# Return updated population and death count | |
list(population = population, deaths = length(death_ids)) | |
} | |
# Add a seasonal factor | |
get_seasonal_factor <- function(week) { | |
1 + 0.5 * sin(2 * pi * (week / weeks_per_year)) | |
} | |
# High mortality event effect profile | |
high_mortality_factors <- c(1.1, 1.5, 2, 1.8, 1.5, 1.5, 1.1) | |
high_mortality_weeks <- 100:(100 + length(high_mortality_factors) - 1) | |
# Simulation loop with seasonal mortality risks and high mortality event | |
weekly_deaths <- numeric(simulation_duration_weeks) | |
for (week in 1:simulation_duration_weeks) { | |
high_mortality_factor <- ifelse( | |
week %in% high_mortality_weeks, | |
high_mortality_factors[week - min(high_mortality_weeks) + 1], | |
1 | |
) | |
# Simulate one week | |
result <- simulate_week( | |
population, | |
seasonal_factor = get_seasonal_factor(week), | |
high_mortality_factor = high_mortality_factor | |
) | |
population <- result$population | |
weekly_deaths[week] <- result$deaths | |
print(paste0( | |
"Week: ", week, "; Deaths: ", weekly_deaths[week], | |
"; Population: ", nrow(population) | |
)) | |
} | |
# Prepare data for plotting | |
sim_data <- data.frame( | |
Week = 1:simulation_duration_weeks, | |
Deaths = weekly_deaths | |
) | |
# Plot weekly deaths with high mortality event | |
chart <- ggplot(sim_data, aes(x = Week, y = Deaths)) + | |
geom_line(color = "black") + | |
labs( | |
title = "Weekly Deaths Over 5 Years with High Mortality Event [Simulation]", | |
subtitle = paste0( | |
"Population: ", formatC(population_size, format = "d", big.mark = ","), | |
", High Mortality Event Starting Week 100" | |
), | |
x = "Week", | |
y = "Deaths" | |
) | |
# Save the chart | |
ggsave( | |
filename = "chart4.png", plot = chart, | |
width = 8, height = 4, units = "in", dpi = 300 | |
) | |
# source("/Users/ben/Downloads/1.r") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment