Created
November 9, 2023 21:00
-
-
Save allaway/f1082f40adc17ba3a41a3ac0247fdecf to your computer and use it in GitHub Desktop.
Plot expected data for NF Data Portal using IncomingData table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(synapser) | |
library(lubridate) | |
library(tidyverse) | |
##datetime in unix time, integer | |
after_date <- 1704067200L | |
synLogin() | |
incoming_data <- synTableQuery('SELECT * FROM syn51471723')$filepath %>% | |
read_csv() %>% | |
mutate(date_uploadestimate = lubridate::parse_date_time(date_uploadestimate, orders = 'mdy')) %>% | |
filter(date_uploadestimate > as_datetime(after_date)) | |
summarized_data <- incoming_data %>% | |
group_by(assay, fundingAgency) %>% | |
summarize(estimated_samples = sum(estimatedMinNumSamples), estimated_files = sum(estimatedNumFiles)) %>% | |
mutate(estimated_samples = case_when(is.na(estimated_samples) ~ 0, | |
!is.na(estimated_samples) ~ estimated_samples)) %>% | |
mutate(estimated_files = case_when(is.na(estimated_files) ~ 0, | |
!is.na(estimated_files) ~ estimated_files)) %>% | |
ungroup() %>% | |
group_by(assay) %>% | |
mutate(estimated_samples_all = sum(estimated_samples), estimated_files_all = sum(estimated_files)) %>% | |
ungroup() %>% | |
mutate(assay_reordered_files = forcats::fct_reorder(assay, desc(estimated_files_all), .na_rm = T)) %>% | |
mutate(assay_reordered_samples = forcats::fct_reorder(assay, desc(estimated_samples_all), .na_rm = T)) %>% | |
slice_max(estimated_files_all, n =20) | |
max_date <- max(incoming_data$date_uploadestimate) | |
a <- ggplot(summarized_data) + | |
geom_bar(aes(x = assay_reordered_files, y = estimated_files, fill = fundingAgency), stat = 'identity') + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
labs(x = "Assay", y = "Estimated number of Files", | |
title = paste0("Current expected data deposits between ", | |
lubridate::round_date(as_datetime(after_date), unit = 'day'), | |
' and ', | |
lubridate::round_date(as_datetime(max_date), unit = 'day') | |
)) | |
b <- ggplot(summarized_data) + | |
geom_bar(aes(x = assay_reordered_files, y = estimated_samples, fill = fundingAgency), stat = 'identity') + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1))+ | |
labs(x = "Assay", y = "Estimated number of Samples\n(e.g patients, mice, biospecimens, etc.)", | |
title = paste0("Current expected data deposits between ", | |
lubridate::round_date(as_datetime(after_date), unit = 'day'), | |
' and ', | |
lubridate::round_date(as_datetime(max_date), unit = 'day') | |
)) | |
cowplot::plot_grid(a, b, align="hv", axis = "l", ncol = 1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's a related version of this that we used to produce some plots for the 2023 NTAP report with LLM comments ;)