Created
May 29, 2025 15:30
-
-
Save USMortality/a3adc590d5f4ed23e0f41fedde91bddf to your computer and use it in GitHub Desktop.
Crude Cancer Incidence & Mortality [USA]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(tidyr) | |
library(ggplot2) | |
library(dplyr) | |
library(fable) | |
library(tsibble) | |
sf <- 2 | |
width <- 900 * sf | |
height <- 450 * sf | |
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
usa_pop <- read_csv("https://s3.mortality.watch/data/population/usa/20y.csv") |> | |
filter(iso3c == "USA", age_group == "all") |> | |
select(year, population) | |
# Cancer data with male/female breakdown | |
cancer_data <- tibble( | |
year = c(2010:2025), | |
Estimated_New_Cases_Both_Sexes = c(1529560, 1596670, 1638910, 1660290, 1665540, 1658370, 1685210, 1688780, 1735350, 1762450, 1806590, 1898160, 1918030, 1958310, 2001140, 2041910), | |
Estimated_Deaths_Both_Sexes = c(569490, 571950, 577190, 580350, 585720, 589430, 595690, 600920, 609640, 606880, 606520, 608570, 609360, 609820, 611720, 618120), | |
Estimated_New_Cases_Male = c(789620, 822300, 848170, 855220, 855440, 841390, 878980, 878500, 910040, 933590, 968490, 1019500, 1030700, 1053360, 1076470, 1099480), | |
Estimated_New_Cases_Female = c(739940, 774370, 790740, 805070, 810100, 816980, 806230, 810280, 825310, 828860, 838100, 878660, 887330, 904950, 924670, 942430) | |
) | |
# Merge with population data and calculate rates | |
cancer_pop <- cancer_data |> | |
left_join(usa_pop, by = join_by(year)) |> | |
mutate( | |
# Calculate rates per 100,000 | |
incidence = (Estimated_New_Cases_Both_Sexes / population) * 100000, | |
mortality = (Estimated_Deaths_Both_Sexes / population) * 100000 | |
) | |
# Function to create cancer trend charts | |
create_cancer_chart <- function(metric = c("incidence", "mortality")) { | |
metric <- match.arg(metric) | |
# Create time series for the selected metric | |
ts <- cancer_pop |> | |
select(year, !!sym(metric)) |> | |
as_tsibble(index = year) | |
# Fit model on pre-2020 data | |
model <- ts |> | |
filter(year <= 2019) |> | |
model(lm = TSLM(!!sym(metric) ~ trend())) | |
# Generate forecasts | |
forecasts <- model |> | |
forecast(h = "6 years", level = 95) |> | |
mutate(hl = hilo(!!sym(metric), level = 95)) |> | |
unpack_hilo(cols = hl) |> | |
as_tibble() |> | |
select(year, .mean, hl_lower, hl_upper) | |
# Compute fitted values for pre-2020 period | |
fitted_values <- model |> | |
augment() |> | |
select(year, .fitted) | |
# Properly join fitted values and forecasts | |
ts_augmented <- ts |> left_join(fitted_values, by = c("year")) | |
ts_with_forecast <- ts_augmented |> left_join(forecasts, by = c("year")) | |
# Set chart parameters based on metric | |
if (metric == "incidence") { | |
color <- "red" | |
title <- "Crude Cancer Incidence [USA]" | |
y_label <- "Incidence Rate per 100,000" | |
} else { | |
color <- "purple" | |
title <- "Crude Cancer Mortality [USA]" | |
y_label <- "Mortality Rate per 100,000" | |
} | |
# Create chart | |
chart <- ggplot(ts_with_forecast, aes(x = year)) + | |
geom_line(aes(y = !!sym(metric)), color = color, size = 1.2) + | |
geom_line(aes(y = .fitted), color = "black", linetype = "solid", size = 1) + | |
geom_line(aes(y = .mean), color = "black", linetype = "solid", size = 1) + | |
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper), | |
fill = "black", alpha = 0.2 | |
) + | |
# Add baseline period shading | |
annotate("rect", | |
xmin = 2010, xmax = 2019, ymin = -Inf, ymax = Inf, | |
alpha = 0.1, fill = "gray50" | |
) + | |
# Add ACS projection points for 2020-2025 | |
geom_point( | |
data = ts_with_forecast |> filter(year >= 2020), | |
aes(y = !!sym(metric)), | |
color = color, shape = 18, size = 3, alpha = 0.8 | |
) + | |
scale_x_continuous(breaks = seq(2010, 2025, 2)) + | |
labs( | |
x = "Year", | |
y = y_label, | |
title = title, | |
subtitle = "Baseline: 2010-2019 | Black line: Linear trend (95% CI)", | |
caption = "Source: American Cancer Society | Population: mortality.watch" | |
) + | |
theme_minimal() + | |
theme( | |
plot.background = element_rect(fill = "white", color = NA), | |
axis.text.x = element_text(angle = 45, hjust = 1), | |
plot.title = element_text(size = 14, face = "bold"), | |
plot.subtitle = element_text(size = 10, color = "gray60"), | |
panel.grid.minor = element_blank() | |
) + | |
annotate("text", | |
x = 2014.5, y = Inf, | |
label = "Baseline Period", size = 3, color = "gray50", | |
fontface = "italic", vjust = 1.5 | |
) | |
return(chart) | |
} | |
# Generate and save charts | |
ggsave( | |
filename = "chart1.png", | |
plot = create_cancer_chart("incidence"), | |
width = width, | |
height = height, | |
units = "px", | |
dpi = 72 * sf, | |
device = grDevices::png, | |
type = "cairo" | |
) | |
ggsave( | |
filename = "chart2.png", | |
plot = create_cancer_chart("mortality"), | |
width = width, | |
height = height, | |
units = "px", | |
dpi = 72 * sf, | |
device = grDevices::png, | |
type = "cairo" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment