Last active
August 18, 2025 16:31
-
-
Save chasemc/fda670445f760037d007b03848668417 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
library(data.table) | |
library(ggplot2) | |
library(lubridate) # for month() | |
# https://reporter.nih.gov/search/43gg1_mEQUWulgAiysCfEg/projects?shared=true | |
# Load data | |
data <- fread('SearchResult_Export_06Jul2025_102053.csv') | |
data <- data[!duplicated(`Serial Number`)] | |
# Convert 'Award Notice Date' to Date | |
data[, `Award Notice Date` := as.Date(`Award Notice Date`, format = "%m/%d/%Y")] | |
# Extract Year and Month | |
data[, Year := format(`Award Notice Date`, "%Y")] | |
data[, Month_Num := month(`Award Notice Date`)] # numeric month | |
# Aggregate number of awards by Year and Month | |
monthly_counts <- data[, .N, by = .(Year, Month_Num)] | |
# Ensure correct month order | |
setorder(monthly_counts, Month_Num) | |
# Flag 2025 for bolding | |
monthly_counts[, LineSize := ifelse(Year == "2025", 5.5, 0.8)] | |
monthly_counts[, PointSize := ifelse(Year == "2025", 3, 1.5)] | |
# remove years before 2023 | |
monthly_counts <- monthly_counts[Year >= 2023] | |
# Plot | |
ggplot(monthly_counts, aes(x = Month_Num, y = N, group = Year, color = Year)) + | |
geom_line(aes(size = LineSize)) + | |
scale_x_continuous(breaks = 1:12, labels = month.abb) + | |
scale_size_identity() + | |
labs(title = "NIH R01 New Year 1 Awards by Award Notice Date", | |
x = "", y = "Number of Awards") + | |
theme_minimal() + | |
theme( | |
plot.title = element_text(size = 24, hjust = 0.5, face = "bold"), | |
axis.title = element_text(size = 18, face = "bold"), | |
axis.text = element_text(size = 14), | |
legend.title = element_text(size = 16, face = "bold"), | |
legend.text = element_text(size = 14), | |
legend.key.width = unit(2, "cm") | |
) + | |
scale_color_manual(values = c("2023" = "#E69F00", "2024" = "#56B4E9", "2025" = "#009E73")) + | |
theme(axis.text.x = element_text(size = 18), | |
axis.text.y = element_text(size = 18), | |
axis.title.y = element_text(size = 22, face = "bold")) + | |
guides(color = guide_legend(override.aes = list(linewidth = 5))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(ggplot2) | |
library(lubridate) | |
# Load data | |
# https://reporter.nih.gov/search/DNC_QozdAkOzNk372Q_MOQ/projects?shared=true | |
data <- fread('/home/chase/Downloads/SearchResult_Export_05Jul2025_091110.csv') | |
data <- data[!duplicated(`Serial Number`)] | |
# Convert 'Award Notice Date' to Date | |
data[, `Award Notice Date` := as.Date(`Award Notice Date`, format = "%m/%d/%Y")] | |
# Extract award_notice_year and Month | |
data[, award_notice_year := as.integer(format(`Award Notice Date`, "%Y"))] | |
data[, award_notice_month := month(`Award Notice Date`)] | |
# Aggregate number of awards by award_notice_year and Month | |
monthly_counts <- data[, .N, by = .(award_notice_year, award_notice_month)] | |
# Split data | |
historic_data <- monthly_counts[award_notice_year >= 2010 & award_notice_year <= 2024] | |
data_2025 <- monthly_counts[award_notice_year == 2025] | |
# Calculate mean and sd for each month (historic) | |
summary_stats <- historic_data[, .( | |
mean_n = mean(N), | |
sd_n = sd(N) | |
), by = award_notice_month] | |
# Prepare data for ribbons with legend | |
ribbon_data <- rbindlist(list( | |
summary_stats[, .(award_notice_month, ymin = mean_n - 1 * sd_n, ymax = mean_n + 1 * sd_n, sd_level = "±1 SD")], | |
summary_stats[, .(award_notice_month, ymin = mean_n - 2 * sd_n, ymax = mean_n + 2 * sd_n, sd_level = "±2 SD")], | |
summary_stats[, .(award_notice_month, ymin = mean_n - 3 * sd_n, ymax = mean_n + 3 * sd_n, sd_level = "±3 SD")] | |
)) | |
# Ensure sd_level order | |
ribbon_data[, sd_level := factor(sd_level, levels = c("±3 SD", "±2 SD", "±1 SD"))] | |
# Plot with legend for shading | |
ggplot() + | |
geom_ribbon(data = ribbon_data, | |
aes(x = award_notice_month, ymin = ymin, ymax = ymax, fill = sd_level), | |
alpha = 0.4) + | |
geom_line(data = summary_stats, | |
aes(x = award_notice_month, y = mean_n), | |
color = "blue", size = 1) + | |
geom_line(data = data_2025, | |
aes(x = award_notice_month, y = N), | |
color = "red", size = 1.5) + | |
scale_x_continuous(breaks = 1:12, | |
labels = month.abb) + | |
scale_fill_manual(values = c("±3 SD" = "grey80", "±2 SD" = "grey60", "±1 SD" = "grey40")) + | |
labs( | |
title = "NIH SBIR Phase 1 Awards by Month\n(2010 to 2024 Avg ± 1, 2, 3 SD vs. 2025)", | |
x = "", | |
y = "Number of Awards", | |
fill = "Shading Bands" | |
) + | |
theme_minimal() + | |
theme( | |
plot.title = element_text(size = 24, hjust = 0.5, face = "bold"), | |
axis.title = element_text(size = 18, face = "bold"), | |
axis.text = element_text(size = 14), | |
legend.title = element_text(size = 16, face = "bold"), | |
legend.text = element_text(size = 14), | |
legend.key.width = unit(2, "cm") | |
) + | |
theme(axis.text.x = element_text(size = 18), | |
axis.text.y = element_text(size = 18), | |
axis.title.y = element_text(size = 22, face = "bold")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.