Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save USMortality/a3adc590d5f4ed23e0f41fedde91bddf to your computer and use it in GitHub Desktop.
Save USMortality/a3adc590d5f4ed23e0f41fedde91bddf to your computer and use it in GitHub Desktop.
Crude Cancer Incidence & Mortality [USA]
library(readr)
library(tidyr)
library(ggplot2)
library(dplyr)
library(fable)
library(tsibble)
sf <- 2
width <- 900 * sf
height <- 450 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
usa_pop <- read_csv("https://s3.mortality.watch/data/population/usa/20y.csv") |>
filter(iso3c == "USA", age_group == "all") |>
select(year, population)
# Cancer data with male/female breakdown
cancer_data <- tibble(
year = c(2010:2025),
Estimated_New_Cases_Both_Sexes = c(1529560, 1596670, 1638910, 1660290, 1665540, 1658370, 1685210, 1688780, 1735350, 1762450, 1806590, 1898160, 1918030, 1958310, 2001140, 2041910),
Estimated_Deaths_Both_Sexes = c(569490, 571950, 577190, 580350, 585720, 589430, 595690, 600920, 609640, 606880, 606520, 608570, 609360, 609820, 611720, 618120),
Estimated_New_Cases_Male = c(789620, 822300, 848170, 855220, 855440, 841390, 878980, 878500, 910040, 933590, 968490, 1019500, 1030700, 1053360, 1076470, 1099480),
Estimated_New_Cases_Female = c(739940, 774370, 790740, 805070, 810100, 816980, 806230, 810280, 825310, 828860, 838100, 878660, 887330, 904950, 924670, 942430)
)
# Merge with population data and calculate rates
cancer_pop <- cancer_data |>
left_join(usa_pop, by = join_by(year)) |>
mutate(
# Calculate rates per 100,000
incidence = (Estimated_New_Cases_Both_Sexes / population) * 100000,
mortality = (Estimated_Deaths_Both_Sexes / population) * 100000
)
# Function to create cancer trend charts
create_cancer_chart <- function(metric = c("incidence", "mortality")) {
metric <- match.arg(metric)
# Create time series for the selected metric
ts <- cancer_pop |>
select(year, !!sym(metric)) |>
as_tsibble(index = year)
# Fit model on pre-2020 data
model <- ts |>
filter(year <= 2019) |>
model(lm = TSLM(!!sym(metric) ~ trend()))
# Generate forecasts
forecasts <- model |>
forecast(h = "6 years", level = 95) |>
mutate(hl = hilo(!!sym(metric), level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(year, .mean, hl_lower, hl_upper)
# Compute fitted values for pre-2020 period
fitted_values <- model |>
augment() |>
select(year, .fitted)
# Properly join fitted values and forecasts
ts_augmented <- ts |> left_join(fitted_values, by = c("year"))
ts_with_forecast <- ts_augmented |> left_join(forecasts, by = c("year"))
# Set chart parameters based on metric
if (metric == "incidence") {
color <- "red"
title <- "Crude Cancer Incidence [USA]"
y_label <- "Incidence Rate per 100,000"
} else {
color <- "purple"
title <- "Crude Cancer Mortality [USA]"
y_label <- "Mortality Rate per 100,000"
}
# Create chart
chart <- ggplot(ts_with_forecast, aes(x = year)) +
geom_line(aes(y = !!sym(metric)), color = color, size = 1.2) +
geom_line(aes(y = .fitted), color = "black", linetype = "solid", size = 1) +
geom_line(aes(y = .mean), color = "black", linetype = "solid", size = 1) +
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper),
fill = "black", alpha = 0.2
) +
# Add baseline period shading
annotate("rect",
xmin = 2010, xmax = 2019, ymin = -Inf, ymax = Inf,
alpha = 0.1, fill = "gray50"
) +
# Add ACS projection points for 2020-2025
geom_point(
data = ts_with_forecast |> filter(year >= 2020),
aes(y = !!sym(metric)),
color = color, shape = 18, size = 3, alpha = 0.8
) +
scale_x_continuous(breaks = seq(2010, 2025, 2)) +
labs(
x = "Year",
y = y_label,
title = title,
subtitle = "Baseline: 2010-2019 | Black line: Linear trend (95% CI)",
caption = "Source: American Cancer Society | Population: mortality.watch"
) +
theme_minimal() +
theme(
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10, color = "gray60"),
panel.grid.minor = element_blank()
) +
annotate("text",
x = 2014.5, y = Inf,
label = "Baseline Period", size = 3, color = "gray50",
fontface = "italic", vjust = 1.5
)
return(chart)
}
# Generate and save charts
ggsave(
filename = "chart1.png",
plot = create_cancer_chart("incidence"),
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = "cairo"
)
ggsave(
filename = "chart2.png",
plot = create_cancer_chart("mortality"),
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = "cairo"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment