Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active October 20, 2025 19:52
Show Gist options
  • Save USMortality/7067b000c2275505910c8dc8c959b36d to your computer and use it in GitHub Desktop.
Save USMortality/7067b000c2275505910c8dc8c959b36d to your computer and use it in GitHub Desktop.
Hurricanes [USA]
# Install pdftools if not available
if (!require(pdftools, quietly = TRUE)) {
install.packages("pdftools", repos = "https://cloud.r-project.org", quiet = TRUE)
library(pdftools)
}
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
sf <- 2
width <- 600 * sf
height <- 335 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
# Download and read PDF
pdf_url <- "https://www.nhc.noaa.gov/pdf/hurricaneStrikesByDecade.pdf"
pdf_file <- tempfile(fileext = ".pdf")
download.file(pdf_url, pdf_file, mode = "wb", quiet = TRUE)
pdf_text <- pdf_text(pdf_file)
# Parse the table from PDF text
lines <- strsplit(pdf_text, "\n")[[1]]
# Find lines with decade data (format: YYYY-YYYY)
# Exclude summary rows like "1901-2024" (which spans more than 10 years)
data_lines <- lines[grepl("^\\s*\\d{4}-\\d{4}", lines)]
data_lines <- data_lines[!grepl("1901-2024|Average", data_lines)]
# Parse each line into a data frame
df <- data_lines |>
lapply(function(line) {
# Extract numbers from the line
parts <- strsplit(trimws(line), "\\s+")[[1]]
data.frame(
decade = parts[1],
hurricanes = as.integer(parts[7]), # "All" column
major_hurricanes = as.integer(parts[8]) # "Major" column
)
}) |>
bind_rows() |>
mutate(decade_midpoint = as.numeric(sub("-.*", "", decade)) + 5)
chart <- df |>
ggplot(aes(x = decade_midpoint, y = hurricanes)) +
geom_point(stat = "identity") +
geom_smooth(data = df |> filter(decade != "2021-2024")) +
scale_x_continuous(breaks = df$decade_midpoint, labels = df$decade) +
scale_y_continuous(breaks = scales::pretty_breaks(0), limits = c(0, NA)) +
labs(
title = "U.S. Hurricanes Over the Years",
subtitle = "Source: https://www.nhc.noaa.gov/pastdec.shtml",
x = "Decade", y = "Hurricanes"
) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot2::ggsave(
filename = "chart1.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
# Plot using the numeric midpoint for geom_smooth, but keeping the original decade labels
chart <- df |> ggplot(aes(x = decade_midpoint, y = major_hurricanes)) +
geom_point(stat = "identity") +
geom_smooth(data = df |> filter(decade != "2021-2024")) +
scale_x_continuous(breaks = df$decade_midpoint, labels = df$decade) +
scale_y_continuous(breaks = scales::pretty_breaks(0), limits = c(0, NA)) +
labs(
title = "Major U.S. Hurricanes (3+) Over the Years",
subtitle = "Source: https://www.nhc.noaa.gov/pastdec.shtml",
x = "Decade", y = "Major Hurricanes"
) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot2::ggsave(
filename = "chart2.png", plot = chart, width = width, height = height,
units = "px", dpi = 72 * sf, device = grDevices::png, type = c("cairo")
)
@USMortality
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment