Skip to content

Instantly share code, notes, and snippets.

@allaway
Last active November 10, 2023 17:15
Show Gist options
  • Select an option

  • Save allaway/4422a306adf5ae9142b18a973d8f09d1 to your computer and use it in GitHub Desktop.

Select an option

Save allaway/4422a306adf5ae9142b18a973d8f09d1 to your computer and use it in GitHub Desktop.
store schematic manifests to synapse (NF tools central)
library(googlesheets4)
library(googledrive)
library(synapser)
library(dplyr)
library(readr)
library(purrr)
library(stringr)
synLogin()
storage_project <- "syn51710208"
drive_url <- 'https://drive.google.com/drive/u/1/folders/1yblPMk-kgMj5KJi7P0AFOobZ9vkl6Nkk'
ids <- drive_ls(drive_url, type = 'folder')$id
csv_ids <- sapply(as_id(ids), function(x){
print(drive_ls(x)$id)
drive_ls(x)$id
}) %>% unlist()
manifests <- sapply(csv_ids, function(y){
googledrive::drive_download(as_id(y), type = 'csv', overwrite = T)
})
manifests_df <- t(manifests) %>%
as_tibble() %>%
mutate(component = stringr::str_remove(name, ".+_")) %>%
mutate(component= stringr::str_remove(component, "\\.manifest"))
# Concatenate CSVs with the same component value
concatenated_csvs <- manifests_df %>%
group_by(component) %>%
summarise(local_path_list = list(unique(local_path))) %>%
mutate(
concatenated_path = map2(local_path_list, component, ~{
# Initialize an empty dataframe for combined data
combined_df <- data.frame()
# Loop through each file path, read the data, and combine
for (file_path in .x) {
# Read CSV file with generic column types (e.g., 'c' for character)
# Adjust col_types as needed based on your data
file_data <- read_csv(file_path, col_types = cols(.default = "c"))
# Combine with the existing data
combined_df <- bind_rows(combined_df, file_data)
}
# Write the combined CSV to a file
concatenated_file <- paste0(.y, "_concatenated.csv")
write_csv(combined_df, concatenated_file, na = "")
concatenated_file
})
) %>%
ungroup() %>%
select(component, concatenated_path)
## create folders; not necessary if they already exist
# sapply(unique(manifests_df$component), function(x){
# synStore(synapser::Folder(name = x, parent = storage_project))
# })
folders <- synGetChildren(storage_project)$asList() %>%
bind_rows()
apply(concatenated_csvs, 1, function(x){
folder <- dplyr::filter(folders, name == x$component)
synStore(File(path = x$concatenated_path,
parent = folder$id,
name = "synapse_storage_manifest.csv"))
})
# scrub all files from project, do not do unless wanting to delete everything
# childs <- synGetChildren('syn51710208')$asList()
# sapply(childs, function(x){synDelete(x$id)})
@allaway
Copy link
Author

allaway commented Nov 8, 2023

note, this currently points at a private testing project

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment