Last active
March 13, 2025 18:15
-
-
Save USMortality/97a0dde1d82056d4425cb1b6452a58e2 to your computer and use it in GitHub Desktop.
Age-Standardized & Crude Cancer Deaths by Month & 5y Age Group [USA]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(readr) | |
| library(tidyr) | |
| library(ggplot2) | |
| library(dplyr) | |
| library(fable) | |
| library(tsibble) | |
| sf <- 2 | |
| width <- 900 * sf | |
| height <- 450 * sf | |
| options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
| # Load population data | |
| pop <- read_csv( | |
| "https://s3.mortality.watch/data/population/usa/5y.csv", | |
| col_types = "ccciil" | |
| ) |> | |
| filter(iso3c == "USA") | |
| # Function to load datasets | |
| load_data <- function(url) { | |
| read_delim(url, delim = "\t", col_types = cols(.default = "c")) |> | |
| as_tibble() |> | |
| select(`Month Code`, `Five-Year Age Groups Code`, `Deaths`) |> | |
| setNames(c("date", "age_group", "deaths")) |> | |
| mutate( | |
| year = as.integer(substr(date, 1, 4)), | |
| date = make_yearmonth(year = year, month = substr(date, 6, 7)), | |
| deaths = as.integer(deaths) | |
| ) | |
| } | |
| # Prepare time series data | |
| prepare_ts <- function(df1, df2) { | |
| rbind(df1, df2) |> | |
| arrange(age_group, date) |> | |
| mutate( | |
| age_group = case_when( | |
| age_group == "1" ~ "0-4", | |
| age_group == "1-4" ~ "0-4", | |
| age_group == "85-89" ~ "85+", | |
| age_group == "90-94" ~ "85+", | |
| age_group == "95-99" ~ "85+", | |
| age_group == "100+" ~ "85+", | |
| .default = age_group | |
| ) | |
| ) |> | |
| group_by(year, date, age_group) |> | |
| summarize(deaths = sum(deaths), .groups = "drop") |> | |
| inner_join(pop, by = join_by("year", "age_group")) |> | |
| mutate( | |
| age_group = factor(age_group, levels = c( | |
| "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", | |
| "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", | |
| "70-74", "75-79", "80-84", "85+" | |
| )), | |
| mortality = deaths / population * 100000 | |
| ) |> | |
| as_tsibble(index = date, key = age_group) |> | |
| filter(year >= 2017) | |
| } | |
| # Function to generate and save charts | |
| generate_chart_facet <- function(ts, type, filename, d_type, chart_type = "line") { | |
| model <- ts |> | |
| filter(year >= 2017 & year <= 2019) |> | |
| model(lm = TSLM(mortality ~ season())) | |
| forecasts <- model |> | |
| forecast(h = "5 years", level = 95) |> | |
| mutate(hl = hilo(mortality, level = 95)) |> | |
| unpack_hilo(cols = hl) |> | |
| as_tibble() |> | |
| select(date, age_group, .mean, hl_lower, hl_upper) | |
| # Compute fitted values for pre-2020 period | |
| fitted_values <- model |> | |
| augment() |> | |
| select(date, age_group, .fitted) | |
| # Properly join fitted values | |
| ts_augmented <- ts |> | |
| left_join(fitted_values, by = c("date", "age_group")) | |
| ts_with_forecast <- ts_augmented |> | |
| left_join(forecasts, by = c("date", "age_group")) |> | |
| filter(date <= max(ts$date, na.rm = TRUE) - 2) | |
| if (type == "fitted") { | |
| chart <- ggplot(ts_with_forecast, aes(x = date)) + | |
| geom_line(aes(y = mortality), color = "red") + | |
| geom_line(aes(y = .fitted), color = "black", linetype = "solid") + | |
| geom_line(aes(y = .mean), color = "black", linetype = "solid") + | |
| geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper), | |
| fill = "black", alpha = 0.2 | |
| ) + | |
| labs( | |
| x = "Year", y = "Mortality", | |
| title = paste0( | |
| "Cancer (", d_type, ") Crude Mortality Rate by Age Group [USA]" | |
| ), | |
| subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder" | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
| facet_wrap(vars(age_group), scales = "free") | |
| } else if (chart_type == "bar") { | |
| excess <- ts_with_forecast |> | |
| filter(year >= 2017) |> | |
| mutate(excess = mortality / (ifelse(is.na(.mean), .fitted, .mean)) - 1) | |
| chart <- ggplot(excess, aes(x = date)) + | |
| geom_col(aes(y = excess), fill = "red") + | |
| labs( | |
| x = "Year", y = "Excess Mortality", | |
| title = paste0( | |
| "Relative Cancer (", d_type, ") Excess Mortality by Age Group [USA]" | |
| ), | |
| subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder" | |
| ) + | |
| scale_y_continuous(labels = scales::percent_format()) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
| facet_wrap(vars(age_group), scales = "free") | |
| } | |
| ggsave( | |
| filename = filename, | |
| plot = chart, | |
| width = width, | |
| height = height, | |
| units = "px", | |
| dpi = 72 * sf, | |
| device = grDevices::png, | |
| type = "cairo" | |
| ) | |
| } | |
| generate_chart <- function(ts, type, filename, d_type, chart_type = "line") { | |
| model <- ts |> | |
| filter(year >= 2017 & year <= 2019) |> | |
| model(lm = TSLM(mortality ~ season())) | |
| forecasts <- model |> | |
| forecast(h = "5 years", level = 95) |> | |
| mutate(hl = hilo(mortality, level = 95)) |> | |
| unpack_hilo(cols = hl) |> | |
| as_tibble() |> | |
| select(date, .mean, hl_lower, hl_upper) | |
| # Compute fitted values for pre-2020 period | |
| fitted_values <- model |> | |
| augment() |> | |
| select(date, .fitted) | |
| # Properly join fitted values | |
| ts_augmented <- ts |> | |
| left_join(fitted_values, by = c("date")) | |
| ts_with_forecast <- ts_augmented |> | |
| left_join(forecasts, by = c("date")) |> | |
| filter(date <= max(ts$date, na.rm = TRUE) - 2) | |
| if (type == "fitted") { | |
| chart <- ggplot(ts_with_forecast, aes(x = date)) + | |
| geom_line(aes(y = mortality), color = "red") + | |
| geom_line(aes(y = .fitted), color = "black", linetype = "dashed") + | |
| geom_line(aes(y = .mean), color = "black", linetype = "dashed") + | |
| geom_ribbon(aes(ymin = hl_lower, ymax = hl_upper), | |
| fill = "black", alpha = 0.2 | |
| ) + | |
| labs( | |
| x = "Year", y = "Mortality", | |
| title = paste0( | |
| "Cancer (", d_type, ") Age-Standardized Mortality Rate [USA]" | |
| ), | |
| subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder" | |
| ) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
| } else if (chart_type == "bar") { | |
| excess <- ts_with_forecast |> | |
| filter(year >= 2017) |> | |
| mutate(excess = mortality / (ifelse(is.na(.mean), .fitted, .mean)) - 1) | |
| chart <- ggplot(excess, aes(x = date)) + | |
| geom_col(aes(y = excess), fill = "red") + | |
| labs( | |
| x = "Year", y = "Excess Mortality", | |
| title = paste0( | |
| "Relative Cancer (", d_type, ") Excess Age-Standardized Mortality Rate [USA]" | |
| ), | |
| subtitle = "Baseline: Mean 2017-2019 · Source: CDC Wonder" | |
| ) + | |
| scale_y_continuous(labels = scales::percent_format()) + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
| } | |
| ggsave( | |
| filename = filename, | |
| plot = chart, | |
| width = width, | |
| height = height, | |
| units = "px", | |
| dpi = 72 * sf, | |
| device = grDevices::png, | |
| type = "cairo" | |
| ) | |
| } | |
| # Age Standardized Rates | |
| source(paste0( | |
| "https://gist.githubusercontent.com/USMortality/", | |
| "cbb314e8e29b2f5b5578483504dd0d9f/raw/standard_population.r" | |
| )) | |
| # UCOD | |
| df1 <- load_data( | |
| "https://apify.mortality.watch/cdc-wonder/month-5y-ucd-neoplasm.txt" | |
| ) | |
| df2 <- load_data(paste0( | |
| "https://apify.mortality.watch/cdc-wonder/", | |
| "provisional-month-5y-ucd-neoplasm.txt" | |
| )) |> filter(year > max(df1$year, na.rm = TRUE)) | |
| ts <- prepare_ts(df1, df2) | |
| generate_chart_facet(ts, "fitted", "chart1.png", "UCD", "line") | |
| generate_chart_facet( | |
| ts, "excess", "chart2.png", "UCD", "bar" | |
| ) | |
| # Age Standardized | |
| age_groups <- ts |> filter(!age_group %in% c("all", "NS", NA)) | |
| std_pop <- get_usa2000_bins(unique(age_groups$age_group)) | |
| ts <- ts |> | |
| inner_join(std_pop) |> | |
| mutate(mortality = mortality * weight) |> | |
| as_tibble() |> | |
| group_by(date, year) |> | |
| summarize(mortality = sum(mortality, na.rm = TRUE)) |> | |
| ungroup() |> | |
| as_tsibble(index = date) | |
| generate_chart(ts, "fitted", "chart3.png", "UCD", "line") | |
| # MCD | |
| df1 <- load_data( | |
| "https://apify.mortality.watch/cdc-wonder/month-5y-mcd-neoplasm.txt" | |
| ) | |
| df2 <- load_data(paste0( | |
| "https://apify.mortality.watch/cdc-wonder/", | |
| "provisional-month-5y-mcd-neoplasm.txt" | |
| )) |> filter(year > max(df1$year, na.rm = TRUE)) | |
| ts <- prepare_ts(df1, df2) | |
| generate_chart_facet(ts, "fitted", "chart4.png", "MCD", "line") | |
| generate_chart_facet( | |
| ts, "excess", "chart5.png", "MCD", "bar" | |
| ) | |
| # Age Standardized | |
| age_groups <- ts |> filter(!age_group %in% c("all", "NS", NA)) | |
| std_pop <- get_usa2000_bins(unique(age_groups$age_group)) | |
| ts <- ts |> | |
| inner_join(std_pop) |> | |
| mutate(mortality = mortality * weight) |> | |
| as_tibble() |> | |
| group_by(date, year) |> | |
| summarize(mortality = sum(mortality, na.rm = TRUE)) |> | |
| ungroup() |> | |
| as_tsibble(index = date) | |
| generate_chart(ts, "fitted", "chart6.png", "MCD", "line") |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.mortality.watch/charts/list.html#age-standardized--crude-cancer-deaths-by-month--5y-age-group-usa