Skip to content

Instantly share code, notes, and snippets.

@allaway
Created August 17, 2023 21:57
Show Gist options
  • Save allaway/0edc92ae2ad6b4d40be21b6feffa2903 to your computer and use it in GitHub Desktop.
Save allaway/0edc92ae2ad6b4d40be21b6feffa2903 to your computer and use it in GitHub Desktop.
nftc name mapping
library(synapser)
library(tidyverse)
synLogin()
rename_columns <- function(data_frame, column_name_mapping) {
# Renaming the columns using dplyr
data_frame <- data_frame %>%
rename(!!!column_name_mapping)
# Return the modified data frame
return(data_frame)
}
convert_to_json_array <- function(data, column_name) {
# Make sure the column name is non-empty
if (column_name == "") {
stop("The column name must be non-empty.")
}
# Check if the column exists
if (!column_name %in% names(data)) {
stop(paste("Column", column_name, "does not exist in the dataframe."))
}
# Use mutate to modify the specified column
data <- data %>%
mutate(
!!rlang::sym(column_name) := map_chr(
!!rlang::sym(column_name),
~ if (is.na(.)) {
NA_character_
} else {
values_list <- str_split(., ",", simplify = FALSE)[[1]]
trimmed_values <- map(values_list, str_trim)
toJSON(trimmed_values, auto_unbox = TRUE)
}
)
)
return(data)
}
#### RESOURCES
resource_csv <- "syn51717834"
resource_data <- synGet(resource_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
resource_table <-"syn26450069"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"resourceId" = "Resource_id",
"geneticReagentId" = "Genetic Reagent_id",
"antibodyId" = "Antibody_id",
"cellLineId" = "Cell Line_id",
"animalModelId" = "Animal Model_id",
"biobankId" = "Biobank_id",
"rrid" = "rrid",
"resourceName" = "Resource Name",
"synonyms" = "Synonyms",
"resourceType" = "Resource Type",
"description" = "Description",
"mTARequired" = "MTA Required",
"usageRequirements" = "Usage Requirements",
"dateAdded" = "Date Added",
"dateModified" = "Date Modified",
"howToAcquire" = "How To Acquire"
)
resource_data_2 <- rename_columns(resource_data, column_name_mapping) %>%
convert_to_json_array("synonyms") %>%
convert_to_json_array("usageRequirements") %>%
mutate(dateAdded = as.integer(dateAdded)*1000) %>%
mutate(dateModified = as.integer(dateModified)*1000)
resource_data_2$synonyms[78] <- '["ipNF95.11b C","ipNF95.11bC"]'
resource_data_2 <- resource_data_2 %>%
slice(-50) %>% bind_rows(resource_data_2 %>% slice(50))
resource_data_2 <- resource_data_2 %>%
slice(-543) %>% bind_rows(resource_data_2 %>% slice(543))
resource_data_2 <- resource_data_2 %>%
slice(-986) %>% bind_rows(resource_data_2 %>% slice(986))
# table_res <- synTableQuery(glue::glue("select * from {resource_table}"))
# synDelete(table_res)
synStore(Table(resource_table, resource_data_2))
#### ANIMAL MODELS
am_csv <- "syn51717836"
am_data <- synGet(am_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
am_table <-"syn26486808"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"animalModelId" = "Animal Model_id",
"donorId" = "Donor_id",
"transplantationDonorId" = "Transplantation Donor_id",
"animalState" = "Animal State",
"backgroundStrain" = "Background Strain",
"backgroundSubstrain" = "Background Substrain",
"strainNomenclature" = "Strain Nomenclature",
"generation" = "Generation",
"transplantationType" = "Transplantation Type",
"animalModelGeneticDisorder" = "Animal Model Genetic Disorder",
"animalModelOfManifestation" = "Animal Model Manifestation"
)
am_data_2 <- rename_columns(am_data, column_name_mapping) %>%
convert_to_json_array("animalModelGeneticDisorder") %>%
convert_to_json_array("animalModelOfManifestation")
# table_res <- synTableQuery(glue::glue("select * from {am_table}"))
# synDelete(table_res)
synStore(Table(am_table, am_data_2))
#### CELL LINES
cl_csv <- "syn51717843"
cl_data <- synGet(cl_csv)$path %>%
read_csv(col_types = "ccccccccccccccccccccccc") %>%
select(-Component)
cl_table <- "syn26486823"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"cellLineId" = "Cell Line_id",
"donorId" = "Donor_id",
"organ" = "Organ",
"tissue" = "Tissue",
"cellLineCategory" = "Cell Line Category",
"originYear" = "Origin Year",
"strProfile" = "Str Profile",
"populationDoublingTime" = "Population Doubling Time",
"resistance" = "Resistance",
"contaminatedMisidentified" = "Contaminated Misidentified",
"cellLineGeneticDisorder" = "Cell Line Disease",
"modelOfManifestation" = "Cell Line Manifestation"
)
cl_data_2 <- rename_columns(cl_data, column_name_mapping) %>%
convert_to_json_array("cellLineGeneticDisorder") %>%
convert_to_json_array("modelOfManifestation")
# table_res <- synTableQuery(glue::glue("select * from {cl_table}"))
# synDelete(table_res)
synStore(Table(cl_table, cl_data_2))
#### ANTIBODIES
ab_csv <- "syn51717831"
ab_data <- synGet(ab_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
ab_table <- "syn26486811"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"antibodyId" = "Antibody_id",
"uniprotId" = "uniprotId",
"targetAntigen"= "Target Antigen",
"conjugate" = "Conjugate",
"clonality" = "Clonality",
"cloneId"= "cloneId",
"reactiveSpecies" = "Reactive Species",
"hostOrganism" = "Host Organism"
)
ab_data_2 <- rename_columns(ab_data, column_name_mapping) %>%
convert_to_json_array("reactiveSpecies")
# table_res <- synTableQuery(glue::glue("select * from {ab_table}"))
# synDelete(table_res)
synStore(Table(ab_table, ab_data_2))
#### GENETIC REAGENTS
gr_csv <- "syn51717849"
gr_data <- synGet(gr_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
gr_table <- "syn26486832"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"geneticReagentId" = "Genetic Reagent_id",
"insertName" = "Insert Name",
"insertEntrezId" = "Insert Entrez_id",
"gRNAshRNAsequence" ="gRNA shRNA sequence",
"insertSize" = "Insert Size",
"insertSpecies" = "Insert Species",
"nTerminalTag" = "nTerminal Tag",
"cTerminalTag" = "cTerminal Tag",
"cloningMethod" = "Cloning Method",
"5primeCloningSite" = "5prime Cloning Site",
"5primeSiteDestroyed" = "5prime Site Destroyed",
"3primeCloningSite" = "3prime Clonin gSite",
"3primeSiteDestroyed" = "3prime Site Destroyed",
"promoter" = "Promoter",
"5primer" = "5primer",
"3primer" = "3primer",
"vectorBackbone" = "Vector Backbone",
"vectorType" = "Vector Type",
"backboneSize" = "Backbone Size",
"totalSize" = "Total Size",
"bacterialResistance" = "Bacterial Resistance",
"selectableMarker" = "Selectable Marker",
"copyNumber" = "Copy Number",
"growthTemp" = "Growth Temp",
"growthStrain" = "Growth Strain",
"hazardous" = "Hazardous"
)
gr_data_2 <- rename_columns(gr_data, column_name_mapping) %>%
convert_to_json_array("insertSpecies") %>%
convert_to_json_array("vectorType")
# table_res <- synTableQuery(glue::glue("select * from {gr_table}"))
# synDelete(table_res)
synStore(Table(gr_table, gr_data_2))
#### BIOBANKS
bb_csv <- "syn51717842"
bb_data <- synGet(bb_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
bb_table <- "syn26486821"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"biobankId" = "Biobank_id",
"resourceId" = "Resource_id",
"diseaseType" = "Disease Type",
"biobankURL" = "Biobank URL",
"biobankName" = "Biobank Name",
"specimenPreparationMethod" = "Specimen Preparation Method",
"specimenType" = "Specimen Type",
"tumorType" = "Tumor Type",
"specimenFormat" = "Specimen Format",
"specimenTissueType" = "Specimen Tissue Type"
)
bb_data_2 <- rename_columns(bb_data, column_name_mapping) %>%
convert_to_json_array("diseaseType") %>%
convert_to_json_array("specimenPreparationMethod") %>%
convert_to_json_array("specimenType") %>%
convert_to_json_array("tumorType") %>%
convert_to_json_array("specimenFormat") %>%
convert_to_json_array("specimenTissueType")
# table_res <- synTableQuery(glue::glue("select * from {bb_table}"))
# synDelete(table_res)
synStore(Table(bb_table, bb_data_2))
#### VENDOR
vendor_csv <- "syn51717784"
vendor_data <- synGet(vendor_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
vendor_table <- "syn26486850"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"vendorId" = "Vendor_id",
"vendorUrl" = "Vendor Url",
"vendorName" = "Vendor Name"
)
vendor_data_2 <- rename_columns(vendor_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {vendor_table}"))
# synDelete(table_res)
synStore(Table(vendor_table, vendor_data_2))
#### VENDORITEM
vendoritem_csv <- "syn51717835"
vendoritem_data <- synGet(vendoritem_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
vendoritem_table <- "syn26486843"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"vendorItemId" = "Vendor Item_id",
"resourceId" = "Resource_id",
"vendorId" = "Vendor_id",
"catalogNumber" = "Catalog Number",
"catalogNumberURL" = "Catalog Number URL"
)
vendoritem_data_2 <- rename_columns(vendoritem_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {vendoritem_table}"))
# synDelete(table_res)
synStore(Table(vendoritem_table, vendoritem_data_2))
#### USAGE
usage_csv <- "syn51717841"
usage_data_am <- synGet(usage_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
usage_csv <- "syn51717847"
usage_data_cl <- synGet(usage_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
usage_csv <- "syn51717833"
usage_data_ab <- synGet(usage_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
usage_data <- bind_rows(usage_data_ab, usage_data_cl, usage_data_am)
usage_table <- "syn26486841"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"usageId" = "Usage_id",
"publicationId" = "Publication_id",
"resourceId" = "Resource_id"
)
usage_data_2 <- rename_columns(usage_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {usage_table}"))
# synDelete(table_res)
synStore(Table(usage_table, usage_data_2))
#### RESOURCEAPPLICATION
resourceapplication_csv <- "syn51717832"
resourceapplication_data <- synGet(resourceapplication_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
resourceapplication_table <- "syn26486840"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"resourceApplicationId" = "Resource Application_id",
"resourceId" = "Resource_id",
"source" = "Source",
"links" = "Links",
"applications" = "Applications"
)
resourceapplication_data_2 <- rename_columns(resourceapplication_data, column_name_mapping) %>%
convert_to_json_array("applications")
# table_res <- synTableQuery(glue::glue("select * from {resourceapplication_table}"))
# synDelete(table_res)
synStore(Table(resourceapplication_table, resourceapplication_data_2))
#### PUBLICATION
publication_csv <- "syn51717783"
publication_data <- synGet(publication_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
publication_table <- "syn26486839"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"publicationId" = "Publication_id",
"doi" = "doi",
"pmid" = "pmid",
"abstract" = "Abstract",
"journal"= "Journal",
"publicationDate" = "Publication Date",
"citation" = "citation",
"publicationDateUnix" = "Publication Date Unix",
"authors" = "Authors",
"publicationTitle" = "Publication Title"
)
publication_data_2 <- rename_columns(publication_data, column_name_mapping) %>%
convert_to_json_array("authors")
# table_res <- synTableQuery(glue::glue("select * from {publication_table}"))
# synDelete(table_res)
synStore(Table(publication_table, publication_data_2))
#### OBSERVATION
observation_csv <- "syn51717839"
observation_data_am <- synGet(observation_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
observation_csv <- "syn51717846"
observation_data_cl <- synGet(observation_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
observation_csv <- "syn51717850"
observation_data_gr <- synGet(observation_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
observation_data <- bind_rows(observation_data_am, observation_data_gr, observation_data_cl)
observation_table <- "syn26486836"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"observationId" = "Observation_id",
"synapseId" = "Synapse_id",
"resourceId" = "Resource_id",
"easeofUseRating" = "Ease of Use Rating",
"observationSubmitterName" = "Observation Submitter Name",
"observationReference" = "Observation Reference",
"observationText" = "Observation Text",
"observationLink" = "Observation Link",
"observationTime" = "Observation Time",
"reliabilityRating" = "Reliability Rating",
"observationType" = "Observation Type",
"observationTimeUnits" = "Observation Time Units",
"publicationId" = "Publication_id"
)
observation_data_2 <- rename_columns(observation_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {observation_table}"))
# synDelete(table_res)
synStore(Table(observation_table, observation_data_2))
#### MUTATION DETAILS
mutationdetails_csv <- "syn51717782"
mutationdetails_data <- synGet(mutationdetails_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
mutationdetails_table <- "syn26486835"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"mutationDetailsId" = "Mutation Details_id",
"humanClinVarMutation" = "Human ClinVar Mutation",
"alleleType" = "Allele Type",
"affectedGeneSymbol" = "Affected Gene Symbol",
"mutationMethod" = "Mutation Method",
"externalMutationID" = "External Mutation ID",
"affectedGeneName" = "Affected Gene Name",
"sequenceVariation" = "Sequence Variation",
"chromosome" = "Chromosome",
"proteinVariation" = "Protein Variation",
"animalModelMutation" = "Animal Model Mutation",
"mutationType" = "Mutation Type"
)
mutationdetails_data_2 <- rename_columns(mutationdetails_data, column_name_mapping) %>%
convert_to_json_array("alleleType") %>%
convert_to_json_array("mutationType") %>%
convert_to_json_array("mutationMethod")
# table_res <- synTableQuery(glue::glue("select * from {mutationdetails_table}"))
# synDelete(table_res)
synStore(Table(mutationdetails_table, mutationdetails_data_2))
#### MUTATION
mutation_csv <- "syn51717781"
mutation_data <- synGet(mutation_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
mutation_table <- "syn26486834"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"mutationId" = "Mutation_id",
"mutationDetailsId" = "Mutation Details_id",
"animalModelId" = "Animal Model_id",
"cellLineId" = "Cell Line_id"
)
mutation_data_2 <- rename_columns(mutation_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {mutation_table}"))
# synDelete(table_res)
synStore(Table(mutation_table, mutation_data_2))
#### INVESTIGATOR
investigator_csv <- "syn51717780"
investigator_data <- synGet(investigator_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
investigator_table <- "syn26486833"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"investigatorId" = "Investigator_id",
"investigatorSynapseId" = "Investigator SynapseId",
"orcid" = "orcid",
"institution" = "Institution",
"investigatorWebsite" = "Investigator Website",
"investigatorName" = "Investigator Name"
)
investigator_data_2 <- rename_columns(investigator_data, column_name_mapping) %>%
mutate(institution = case_when(investigatorName == "Robert A. Kesterson" ~ "Pennington Biomedical Research Center",
investigatorName != "Robert A. Kesterson" ~ institution))
# table_res <- synTableQuery(glue::glue("select * from {investigator_table}"))
# synDelete(table_res)
synStore(Table(investigator_table, investigator_data_2))
#### FUNDER
funder_csv <- "syn51717779"
funder_data <- synGet(funder_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
funder_table <- "syn26486830"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"funderId" = "Funder_id",
"funderName" = "Funder Name"
)
funder_data_2 <- rename_columns(funder_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {funder_table}"))
# synDelete(table_res)
synStore(Table(funder_table, funder_data_2))
#### DEVELOPMENT
development_csv <- "syn51717837"
development_data_am <- synGet(development_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
development_csv <- "syn51717844"
development_data_cl <- synGet(development_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
development_csv <- "syn51717848"
development_data_gr <- synGet(development_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
development_data <- bind_rows(development_data_gr,development_data_cl, development_data_am)
development_table <- "syn26486807"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"developmentId" = "Development_id",
"resourceId" = "Resource_id",
"investigatorId" = "Investigator_id",
"publicationId" = "Publication_id",
"funderId" = "Funder_id"
)
development_data_2 <- rename_columns(development_data, column_name_mapping)
# table_res <- synTableQuery(glue::glue("select * from {development_table}"))
# synDelete(table_res)
synStore(Table(development_table, development_data_2))
#### DONOR
donor_csv <- "syn51717838"
donor_data_am <- synGet(donor_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
donor_csv <- "syn51717845"
donor_data_cl <- synGet(donor_csv)$path %>%
read_csv(col_types = "ccccccccccccccccc") %>%
select(-Component)
donor_data <- bind_rows(donor_data_am, donor_data_cl)
donor_table <- "syn26486829"
# Define the mapping between the new column names and the old ones
column_name_mapping <- c(
"parentDonorId" = "Parent Donor_id",
"donorId" = "Donor_id",
"species" = "Species",
"race" = "Race",
"sex" = "Sex",
"age" = "Age",
"transplantationDonorId" = "Transplantation Donor_id"
)
donor_data_2 <- rename_columns(donor_data, column_name_mapping) %>%
mutate(age = case_when(age == "7 MO" ~ "0.583",
.default = as.character(age))) %>%
mutate(species = case_when(species == "Cricetulus griseus, Homo sapiens" ~ "Cricetulus griseus",
species == "Homo sapiens, Mus musculus" ~ "Mus musculus",
.default = as.character(species)))
# table_res <- synTableQuery(glue::glue("select * from {donor_table}"))
# synDelete(table_res)
synStore(Table(donor_table, donor_data_2))
##delete rows in prod table
table_res <- synTableQuery(glue::glue("select * from syn26470542"))
synDelete(table_res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment