Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save USMortality/97a0dde1d82056d4425cb1b6452a58e2 to your computer and use it in GitHub Desktop.

Select an option

Save USMortality/97a0dde1d82056d4425cb1b6452a58e2 to your computer and use it in GitHub Desktop.
Age-Standardized & Crude Cancer Deaths by Month & 5y Age Group [USA]
library(readr)
library(tidyr)
library(ggplot2)
library(dplyr)
library(fable)
library(tsibble)
sf <- 2
width <- 900 * sf
height <- 450 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Load population data
pop <- read_csv(
"https://s3.mortality.watch/data/population/usa/5y.csv",
col_types = "ccciil"
) |>
filter(iso3c == "USA")
# Function to load datasets
load_data <- function(url) {
read_delim(url, delim = "\t", col_types = cols(.default = "c")) |>
as_tibble() |>
select(`Month Code`, `Five-Year Age Groups Code`, `Deaths`) |>
setNames(c("date", "age_group", "deaths")) |>
mutate(
year = as.integer(substr(date, 1, 4)),
date = make_yearmonth(year = year, month = substr(date, 6, 7)),
deaths = as.integer(deaths)
)
}
# Prepare time series data
prepare_ts <- function(df1, df2) {
rbind(df1, df2) |>
arrange(age_group, date) |>
mutate(
age_group = case_when(
age_group == "1" ~ "0-4",
age_group == "1-4" ~ "0-4",
age_group == "85-89" ~ "85+",
age_group == "90-94" ~ "85+",
age_group == "95-99" ~ "85+",
age_group == "100+" ~ "85+",
.default = age_group
)
) |>
group_by(year, date, age_group) |>
summarize(deaths = sum(deaths), .groups = "drop") |>
inner_join(pop, by = join_by("year", "age_group")) |>
mutate(
age_group = factor(age_group, levels = c(
"0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69",
"70-74", "75-79", "80-84", "85+"
)),
mortality = deaths / population * 100000
) |>
as_tsibble(index = date, key = age_group) |>
filter(year >= 2017)
}
# Function to generate and save charts
generate_chart_facet <- function(ts, type, filename, d_type, chart_type = "line") {
model <- ts |>
filter(year >= 2017 & year <= 2019) |>
model(lm = TSLM(mortality ~ season()))
forecasts <- model |>
forecast(h = "5 years", level = 95) |>
mutate(hl = hilo(mortality, level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(date, age_group, .mean, hl_lower, hl_upper)
# Compute fitted values for pre-2020 period
fitted_values <- model |>
augment() |>
select(date, age_group, .fitted)
# Properly join fitted values
ts_augmented <- ts |>
left_join(fitted_values, by = c("date", "age_group"))
ts_with_forecast <- ts_augmented |>
left_join(forecasts, by = c("date", "age_group")) |>
filter(date <= max(ts$date, na.rm = TRUE) - 2)
if (type == "fitted") {
chart <- ggplot(ts_with_forecast, aes(x = date)) +
geom_line(aes(y = mortality), color = "red") +
geom_line(aes(y = .fitted), color = "black", linetype = "solid") +
geom_line(aes(y = .mean), color = "black", linetype = "solid") +
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper),
fill = "black", alpha = 0.2
) +
labs(
x = "Year", y = "Mortality",
title = paste0(
"Cancer (", d_type, ") Crude Mortality Rate by Age Group [USA]"
),
subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(age_group), scales = "free")
} else if (chart_type == "bar") {
excess <- ts_with_forecast |>
filter(year >= 2017) |>
mutate(excess = mortality / (ifelse(is.na(.mean), .fitted, .mean)) - 1)
chart <- ggplot(excess, aes(x = date)) +
geom_col(aes(y = excess), fill = "red") +
labs(
x = "Year", y = "Excess Mortality",
title = paste0(
"Relative Cancer (", d_type, ") Excess Mortality by Age Group [USA]"
),
subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(age_group), scales = "free")
}
ggsave(
filename = filename,
plot = chart,
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = "cairo"
)
}
generate_chart <- function(ts, type, filename, d_type, chart_type = "line") {
model <- ts |>
filter(year >= 2017 & year <= 2019) |>
model(lm = TSLM(mortality ~ season()))
forecasts <- model |>
forecast(h = "5 years", level = 95) |>
mutate(hl = hilo(mortality, level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(date, .mean, hl_lower, hl_upper)
# Compute fitted values for pre-2020 period
fitted_values <- model |>
augment() |>
select(date, .fitted)
# Properly join fitted values
ts_augmented <- ts |>
left_join(fitted_values, by = c("date"))
ts_with_forecast <- ts_augmented |>
left_join(forecasts, by = c("date")) |>
filter(date <= max(ts$date, na.rm = TRUE) - 2)
if (type == "fitted") {
chart <- ggplot(ts_with_forecast, aes(x = date)) +
geom_line(aes(y = mortality), color = "red") +
geom_line(aes(y = .fitted), color = "black", linetype = "dashed") +
geom_line(aes(y = .mean), color = "black", linetype = "dashed") +
geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper),
fill = "black", alpha = 0.2
) +
labs(
x = "Year", y = "Mortality",
title = paste0(
"Cancer (", d_type, ") Age-Standardized Mortality Rate [USA]"
),
subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
} else if (chart_type == "bar") {
excess <- ts_with_forecast |>
filter(year >= 2017) |>
mutate(excess = mortality / (ifelse(is.na(.mean), .fitted, .mean)) - 1)
chart <- ggplot(excess, aes(x = date)) +
geom_col(aes(y = excess), fill = "red") +
labs(
x = "Year", y = "Excess Mortality",
title = paste0(
"Relative Cancer (", d_type, ") Excess Age-Standardized Mortality Rate [USA]"
),
subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
ggsave(
filename = filename,
plot = chart,
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = "cairo"
)
}
# Age Standardized Rates
source(paste0(
"https://gist.githubusercontent.com/USMortality/",
"cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r"
))
# UCOD
df1 <- load_data(
"https://apify.mortality.watch/cdc-wonder/month-5y-ucd-neoplasm.txt"
)
df2 <- load_data(paste0(
"https://apify.mortality.watch/cdc-wonder/",
"provisional-month-5y-ucd-neoplasm.txt"
)) |> filter(year > max(df1$year, na.rm = TRUE))
ts <- prepare_ts(df1, df2)
generate_chart_facet(ts, "fitted", "chart1.png", "UCD", "line")
generate_chart_facet(
ts, "excess", "chart2.png", "UCD", "bar"
)
# Age Standardized
age_groups <- ts |> filter(!age_group %in% c("all", "NS", NA))
std_pop <- get_usa2000_bins(unique(age_groups$age_group))
ts <- ts |>
inner_join(std_pop) |>
mutate(mortality = mortality * weight) |>
as_tibble() |>
group_by(date, year) |>
summarize(mortality = sum(mortality, na.rm = TRUE)) |>
ungroup() |>
as_tsibble(index = date)
generate_chart(ts, "fitted", "chart3.png", "UCD", "line")
# MCD
df1 <- load_data(
"https://apify.mortality.watch/cdc-wonder/month-5y-mcd-neoplasm.txt"
)
df2 <- load_data(paste0(
"https://apify.mortality.watch/cdc-wonder/",
"provisional-month-5y-mcd-neoplasm.txt"
)) |> filter(year > max(df1$year, na.rm = TRUE))
ts <- prepare_ts(df1, df2)
generate_chart_facet(ts, "fitted", "chart4.png", "MCD", "line")
generate_chart_facet(
ts, "excess", "chart5.png", "MCD", "bar"
)
# Age Standardized
age_groups <- ts |> filter(!age_group %in% c("all", "NS", NA))
std_pop <- get_usa2000_bins(unique(age_groups$age_group))
ts <- ts |>
inner_join(std_pop) |>
mutate(mortality = mortality * weight) |>
as_tibble() |>
group_by(date, year) |>
summarize(mortality = sum(mortality, na.rm = TRUE)) |>
ungroup() |>
as_tsibble(index = date)
generate_chart(ts, "fitted", "chart6.png", "MCD", "line")
@USMortality

Copy link
Copy Markdown
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment