Last active
October 20, 2025 19:52
-
-
Save USMortality/7067b000c2275505910c8dc8c959b36d to your computer and use it in GitHub Desktop.
Hurricanes [USA]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Install pdftools if not available | |
| if (!require(pdftools, quietly = TRUE)) { | |
| install.packages("pdftools", repos = "https://cloud.r-project.org", quiet = TRUE) | |
| library(pdftools) | |
| } | |
| library(dplyr) | |
| library(ggplot2) | |
| library(tidyr) | |
| library(scales) | |
| sf <- 2 | |
| width <- 600 * sf | |
| height <- 335 * sf | |
| options(vsc.dev.args = list(width = width, height = height, res = 72 * sf)) | |
| # Download and read PDF | |
| pdf_url <- "https://www.nhc.noaa.gov/pdf/hurricaneStrikesByDecade.pdf" | |
| pdf_file <- tempfile(fileext = ".pdf") | |
| download.file(pdf_url, pdf_file, mode = "wb", quiet = TRUE) | |
| pdf_text <- pdf_text(pdf_file) | |
| # Parse the table from PDF text | |
| lines <- strsplit(pdf_text, "\n")[[1]] | |
| # Find lines with decade data (format: YYYY-YYYY) | |
| # Exclude summary rows like "1901-2024" (which spans more than 10 years) | |
| data_lines <- lines[grepl("^\\s*\\d{4}-\\d{4}", lines)] | |
| data_lines <- data_lines[!grepl("1901-2024|Average", data_lines)] | |
| # Parse each line into a data frame | |
| df <- data_lines |> | |
| lapply(function(line) { | |
| # Extract numbers from the line | |
| parts <- strsplit(trimws(line), "\\s+")[[1]] | |
| data.frame( | |
| decade = parts[1], | |
| hurricanes = as.integer(parts[7]), # "All" column | |
| major_hurricanes = as.integer(parts[8]) # "Major" column | |
| ) | |
| }) |> | |
| bind_rows() |> | |
| mutate(decade_midpoint = as.numeric(sub("-.*", "", decade)) + 5) | |
| chart <- df |> | |
| ggplot(aes(x = decade_midpoint, y = hurricanes)) + | |
| geom_point(stat = "identity") + | |
| geom_smooth(data = df |> filter(decade != "2021-2024")) + | |
| scale_x_continuous(breaks = df$decade_midpoint, labels = df$decade) + | |
| scale_y_continuous(breaks = scales::pretty_breaks(0), limits = c(0, NA)) + | |
| labs( | |
| title = "U.S. Hurricanes Over the Years", | |
| subtitle = "Source: https://www.nhc.noaa.gov/pastdec.shtml", | |
| x = "Decade", y = "Hurricanes" | |
| ) + | |
| theme_bw() + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
| ggplot2::ggsave( | |
| filename = "chart1.png", plot = chart, width = width, height = height, | |
| units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
| ) | |
| # Plot using the numeric midpoint for geom_smooth, but keeping the original decade labels | |
| chart <- df |> ggplot(aes(x = decade_midpoint, y = major_hurricanes)) + | |
| geom_point(stat = "identity") + | |
| geom_smooth(data = df |> filter(decade != "2021-2024")) + | |
| scale_x_continuous(breaks = df$decade_midpoint, labels = df$decade) + | |
| scale_y_continuous(breaks = scales::pretty_breaks(0), limits = c(0, NA)) + | |
| labs( | |
| title = "Major U.S. Hurricanes (3+) Over the Years", | |
| subtitle = "Source: https://www.nhc.noaa.gov/pastdec.shtml", | |
| x = "Decade", y = "Major Hurricanes" | |
| ) + | |
| theme_bw() + | |
| theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
| ggplot2::ggsave( | |
| filename = "chart2.png", plot = chart, width = width, height = height, | |
| units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo") | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
****https://www.mortality.watch/charts/list.html#hurricanes-usa