Created
August 17, 2023 21:57
-
-
Save allaway/0edc92ae2ad6b4d40be21b6feffa2903 to your computer and use it in GitHub Desktop.
nftc name mapping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(synapser) | |
library(tidyverse) | |
synLogin() | |
rename_columns <- function(data_frame, column_name_mapping) { | |
# Renaming the columns using dplyr | |
data_frame <- data_frame %>% | |
rename(!!!column_name_mapping) | |
# Return the modified data frame | |
return(data_frame) | |
} | |
convert_to_json_array <- function(data, column_name) { | |
# Make sure the column name is non-empty | |
if (column_name == "") { | |
stop("The column name must be non-empty.") | |
} | |
# Check if the column exists | |
if (!column_name %in% names(data)) { | |
stop(paste("Column", column_name, "does not exist in the dataframe.")) | |
} | |
# Use mutate to modify the specified column | |
data <- data %>% | |
mutate( | |
!!rlang::sym(column_name) := map_chr( | |
!!rlang::sym(column_name), | |
~ if (is.na(.)) { | |
NA_character_ | |
} else { | |
values_list <- str_split(., ",", simplify = FALSE)[[1]] | |
trimmed_values <- map(values_list, str_trim) | |
toJSON(trimmed_values, auto_unbox = TRUE) | |
} | |
) | |
) | |
return(data) | |
} | |
#### RESOURCES | |
resource_csv <- "syn51717834" | |
resource_data <- synGet(resource_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
resource_table <-"syn26450069" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"resourceId" = "Resource_id", | |
"geneticReagentId" = "Genetic Reagent_id", | |
"antibodyId" = "Antibody_id", | |
"cellLineId" = "Cell Line_id", | |
"animalModelId" = "Animal Model_id", | |
"biobankId" = "Biobank_id", | |
"rrid" = "rrid", | |
"resourceName" = "Resource Name", | |
"synonyms" = "Synonyms", | |
"resourceType" = "Resource Type", | |
"description" = "Description", | |
"mTARequired" = "MTA Required", | |
"usageRequirements" = "Usage Requirements", | |
"dateAdded" = "Date Added", | |
"dateModified" = "Date Modified", | |
"howToAcquire" = "How To Acquire" | |
) | |
resource_data_2 <- rename_columns(resource_data, column_name_mapping) %>% | |
convert_to_json_array("synonyms") %>% | |
convert_to_json_array("usageRequirements") %>% | |
mutate(dateAdded = as.integer(dateAdded)*1000) %>% | |
mutate(dateModified = as.integer(dateModified)*1000) | |
resource_data_2$synonyms[78] <- '["ipNF95.11b C","ipNF95.11bC"]' | |
resource_data_2 <- resource_data_2 %>% | |
slice(-50) %>% bind_rows(resource_data_2 %>% slice(50)) | |
resource_data_2 <- resource_data_2 %>% | |
slice(-543) %>% bind_rows(resource_data_2 %>% slice(543)) | |
resource_data_2 <- resource_data_2 %>% | |
slice(-986) %>% bind_rows(resource_data_2 %>% slice(986)) | |
# table_res <- synTableQuery(glue::glue("select * from {resource_table}")) | |
# synDelete(table_res) | |
synStore(Table(resource_table, resource_data_2)) | |
#### ANIMAL MODELS | |
am_csv <- "syn51717836" | |
am_data <- synGet(am_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
am_table <-"syn26486808" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"animalModelId" = "Animal Model_id", | |
"donorId" = "Donor_id", | |
"transplantationDonorId" = "Transplantation Donor_id", | |
"animalState" = "Animal State", | |
"backgroundStrain" = "Background Strain", | |
"backgroundSubstrain" = "Background Substrain", | |
"strainNomenclature" = "Strain Nomenclature", | |
"generation" = "Generation", | |
"transplantationType" = "Transplantation Type", | |
"animalModelGeneticDisorder" = "Animal Model Genetic Disorder", | |
"animalModelOfManifestation" = "Animal Model Manifestation" | |
) | |
am_data_2 <- rename_columns(am_data, column_name_mapping) %>% | |
convert_to_json_array("animalModelGeneticDisorder") %>% | |
convert_to_json_array("animalModelOfManifestation") | |
# table_res <- synTableQuery(glue::glue("select * from {am_table}")) | |
# synDelete(table_res) | |
synStore(Table(am_table, am_data_2)) | |
#### CELL LINES | |
cl_csv <- "syn51717843" | |
cl_data <- synGet(cl_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccccccccc") %>% | |
select(-Component) | |
cl_table <- "syn26486823" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"cellLineId" = "Cell Line_id", | |
"donorId" = "Donor_id", | |
"organ" = "Organ", | |
"tissue" = "Tissue", | |
"cellLineCategory" = "Cell Line Category", | |
"originYear" = "Origin Year", | |
"strProfile" = "Str Profile", | |
"populationDoublingTime" = "Population Doubling Time", | |
"resistance" = "Resistance", | |
"contaminatedMisidentified" = "Contaminated Misidentified", | |
"cellLineGeneticDisorder" = "Cell Line Disease", | |
"modelOfManifestation" = "Cell Line Manifestation" | |
) | |
cl_data_2 <- rename_columns(cl_data, column_name_mapping) %>% | |
convert_to_json_array("cellLineGeneticDisorder") %>% | |
convert_to_json_array("modelOfManifestation") | |
# table_res <- synTableQuery(glue::glue("select * from {cl_table}")) | |
# synDelete(table_res) | |
synStore(Table(cl_table, cl_data_2)) | |
#### ANTIBODIES | |
ab_csv <- "syn51717831" | |
ab_data <- synGet(ab_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
ab_table <- "syn26486811" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"antibodyId" = "Antibody_id", | |
"uniprotId" = "uniprotId", | |
"targetAntigen"= "Target Antigen", | |
"conjugate" = "Conjugate", | |
"clonality" = "Clonality", | |
"cloneId"= "cloneId", | |
"reactiveSpecies" = "Reactive Species", | |
"hostOrganism" = "Host Organism" | |
) | |
ab_data_2 <- rename_columns(ab_data, column_name_mapping) %>% | |
convert_to_json_array("reactiveSpecies") | |
# table_res <- synTableQuery(glue::glue("select * from {ab_table}")) | |
# synDelete(table_res) | |
synStore(Table(ab_table, ab_data_2)) | |
#### GENETIC REAGENTS | |
gr_csv <- "syn51717849" | |
gr_data <- synGet(gr_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
gr_table <- "syn26486832" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"geneticReagentId" = "Genetic Reagent_id", | |
"insertName" = "Insert Name", | |
"insertEntrezId" = "Insert Entrez_id", | |
"gRNAshRNAsequence" ="gRNA shRNA sequence", | |
"insertSize" = "Insert Size", | |
"insertSpecies" = "Insert Species", | |
"nTerminalTag" = "nTerminal Tag", | |
"cTerminalTag" = "cTerminal Tag", | |
"cloningMethod" = "Cloning Method", | |
"5primeCloningSite" = "5prime Cloning Site", | |
"5primeSiteDestroyed" = "5prime Site Destroyed", | |
"3primeCloningSite" = "3prime Clonin gSite", | |
"3primeSiteDestroyed" = "3prime Site Destroyed", | |
"promoter" = "Promoter", | |
"5primer" = "5primer", | |
"3primer" = "3primer", | |
"vectorBackbone" = "Vector Backbone", | |
"vectorType" = "Vector Type", | |
"backboneSize" = "Backbone Size", | |
"totalSize" = "Total Size", | |
"bacterialResistance" = "Bacterial Resistance", | |
"selectableMarker" = "Selectable Marker", | |
"copyNumber" = "Copy Number", | |
"growthTemp" = "Growth Temp", | |
"growthStrain" = "Growth Strain", | |
"hazardous" = "Hazardous" | |
) | |
gr_data_2 <- rename_columns(gr_data, column_name_mapping) %>% | |
convert_to_json_array("insertSpecies") %>% | |
convert_to_json_array("vectorType") | |
# table_res <- synTableQuery(glue::glue("select * from {gr_table}")) | |
# synDelete(table_res) | |
synStore(Table(gr_table, gr_data_2)) | |
#### BIOBANKS | |
bb_csv <- "syn51717842" | |
bb_data <- synGet(bb_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
bb_table <- "syn26486821" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"biobankId" = "Biobank_id", | |
"resourceId" = "Resource_id", | |
"diseaseType" = "Disease Type", | |
"biobankURL" = "Biobank URL", | |
"biobankName" = "Biobank Name", | |
"specimenPreparationMethod" = "Specimen Preparation Method", | |
"specimenType" = "Specimen Type", | |
"tumorType" = "Tumor Type", | |
"specimenFormat" = "Specimen Format", | |
"specimenTissueType" = "Specimen Tissue Type" | |
) | |
bb_data_2 <- rename_columns(bb_data, column_name_mapping) %>% | |
convert_to_json_array("diseaseType") %>% | |
convert_to_json_array("specimenPreparationMethod") %>% | |
convert_to_json_array("specimenType") %>% | |
convert_to_json_array("tumorType") %>% | |
convert_to_json_array("specimenFormat") %>% | |
convert_to_json_array("specimenTissueType") | |
# table_res <- synTableQuery(glue::glue("select * from {bb_table}")) | |
# synDelete(table_res) | |
synStore(Table(bb_table, bb_data_2)) | |
#### VENDOR | |
vendor_csv <- "syn51717784" | |
vendor_data <- synGet(vendor_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
vendor_table <- "syn26486850" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"vendorId" = "Vendor_id", | |
"vendorUrl" = "Vendor Url", | |
"vendorName" = "Vendor Name" | |
) | |
vendor_data_2 <- rename_columns(vendor_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {vendor_table}")) | |
# synDelete(table_res) | |
synStore(Table(vendor_table, vendor_data_2)) | |
#### VENDORITEM | |
vendoritem_csv <- "syn51717835" | |
vendoritem_data <- synGet(vendoritem_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
vendoritem_table <- "syn26486843" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"vendorItemId" = "Vendor Item_id", | |
"resourceId" = "Resource_id", | |
"vendorId" = "Vendor_id", | |
"catalogNumber" = "Catalog Number", | |
"catalogNumberURL" = "Catalog Number URL" | |
) | |
vendoritem_data_2 <- rename_columns(vendoritem_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {vendoritem_table}")) | |
# synDelete(table_res) | |
synStore(Table(vendoritem_table, vendoritem_data_2)) | |
#### USAGE | |
usage_csv <- "syn51717841" | |
usage_data_am <- synGet(usage_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
usage_csv <- "syn51717847" | |
usage_data_cl <- synGet(usage_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
usage_csv <- "syn51717833" | |
usage_data_ab <- synGet(usage_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
usage_data <- bind_rows(usage_data_ab, usage_data_cl, usage_data_am) | |
usage_table <- "syn26486841" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"usageId" = "Usage_id", | |
"publicationId" = "Publication_id", | |
"resourceId" = "Resource_id" | |
) | |
usage_data_2 <- rename_columns(usage_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {usage_table}")) | |
# synDelete(table_res) | |
synStore(Table(usage_table, usage_data_2)) | |
#### RESOURCEAPPLICATION | |
resourceapplication_csv <- "syn51717832" | |
resourceapplication_data <- synGet(resourceapplication_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
resourceapplication_table <- "syn26486840" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"resourceApplicationId" = "Resource Application_id", | |
"resourceId" = "Resource_id", | |
"source" = "Source", | |
"links" = "Links", | |
"applications" = "Applications" | |
) | |
resourceapplication_data_2 <- rename_columns(resourceapplication_data, column_name_mapping) %>% | |
convert_to_json_array("applications") | |
# table_res <- synTableQuery(glue::glue("select * from {resourceapplication_table}")) | |
# synDelete(table_res) | |
synStore(Table(resourceapplication_table, resourceapplication_data_2)) | |
#### PUBLICATION | |
publication_csv <- "syn51717783" | |
publication_data <- synGet(publication_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
publication_table <- "syn26486839" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"publicationId" = "Publication_id", | |
"doi" = "doi", | |
"pmid" = "pmid", | |
"abstract" = "Abstract", | |
"journal"= "Journal", | |
"publicationDate" = "Publication Date", | |
"citation" = "citation", | |
"publicationDateUnix" = "Publication Date Unix", | |
"authors" = "Authors", | |
"publicationTitle" = "Publication Title" | |
) | |
publication_data_2 <- rename_columns(publication_data, column_name_mapping) %>% | |
convert_to_json_array("authors") | |
# table_res <- synTableQuery(glue::glue("select * from {publication_table}")) | |
# synDelete(table_res) | |
synStore(Table(publication_table, publication_data_2)) | |
#### OBSERVATION | |
observation_csv <- "syn51717839" | |
observation_data_am <- synGet(observation_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
observation_csv <- "syn51717846" | |
observation_data_cl <- synGet(observation_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
observation_csv <- "syn51717850" | |
observation_data_gr <- synGet(observation_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
observation_data <- bind_rows(observation_data_am, observation_data_gr, observation_data_cl) | |
observation_table <- "syn26486836" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"observationId" = "Observation_id", | |
"synapseId" = "Synapse_id", | |
"resourceId" = "Resource_id", | |
"easeofUseRating" = "Ease of Use Rating", | |
"observationSubmitterName" = "Observation Submitter Name", | |
"observationReference" = "Observation Reference", | |
"observationText" = "Observation Text", | |
"observationLink" = "Observation Link", | |
"observationTime" = "Observation Time", | |
"reliabilityRating" = "Reliability Rating", | |
"observationType" = "Observation Type", | |
"observationTimeUnits" = "Observation Time Units", | |
"publicationId" = "Publication_id" | |
) | |
observation_data_2 <- rename_columns(observation_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {observation_table}")) | |
# synDelete(table_res) | |
synStore(Table(observation_table, observation_data_2)) | |
#### MUTATION DETAILS | |
mutationdetails_csv <- "syn51717782" | |
mutationdetails_data <- synGet(mutationdetails_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
mutationdetails_table <- "syn26486835" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"mutationDetailsId" = "Mutation Details_id", | |
"humanClinVarMutation" = "Human ClinVar Mutation", | |
"alleleType" = "Allele Type", | |
"affectedGeneSymbol" = "Affected Gene Symbol", | |
"mutationMethod" = "Mutation Method", | |
"externalMutationID" = "External Mutation ID", | |
"affectedGeneName" = "Affected Gene Name", | |
"sequenceVariation" = "Sequence Variation", | |
"chromosome" = "Chromosome", | |
"proteinVariation" = "Protein Variation", | |
"animalModelMutation" = "Animal Model Mutation", | |
"mutationType" = "Mutation Type" | |
) | |
mutationdetails_data_2 <- rename_columns(mutationdetails_data, column_name_mapping) %>% | |
convert_to_json_array("alleleType") %>% | |
convert_to_json_array("mutationType") %>% | |
convert_to_json_array("mutationMethod") | |
# table_res <- synTableQuery(glue::glue("select * from {mutationdetails_table}")) | |
# synDelete(table_res) | |
synStore(Table(mutationdetails_table, mutationdetails_data_2)) | |
#### MUTATION | |
mutation_csv <- "syn51717781" | |
mutation_data <- synGet(mutation_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
mutation_table <- "syn26486834" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"mutationId" = "Mutation_id", | |
"mutationDetailsId" = "Mutation Details_id", | |
"animalModelId" = "Animal Model_id", | |
"cellLineId" = "Cell Line_id" | |
) | |
mutation_data_2 <- rename_columns(mutation_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {mutation_table}")) | |
# synDelete(table_res) | |
synStore(Table(mutation_table, mutation_data_2)) | |
#### INVESTIGATOR | |
investigator_csv <- "syn51717780" | |
investigator_data <- synGet(investigator_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
investigator_table <- "syn26486833" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"investigatorId" = "Investigator_id", | |
"investigatorSynapseId" = "Investigator SynapseId", | |
"orcid" = "orcid", | |
"institution" = "Institution", | |
"investigatorWebsite" = "Investigator Website", | |
"investigatorName" = "Investigator Name" | |
) | |
investigator_data_2 <- rename_columns(investigator_data, column_name_mapping) %>% | |
mutate(institution = case_when(investigatorName == "Robert A. Kesterson" ~ "Pennington Biomedical Research Center", | |
investigatorName != "Robert A. Kesterson" ~ institution)) | |
# table_res <- synTableQuery(glue::glue("select * from {investigator_table}")) | |
# synDelete(table_res) | |
synStore(Table(investigator_table, investigator_data_2)) | |
#### FUNDER | |
funder_csv <- "syn51717779" | |
funder_data <- synGet(funder_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
funder_table <- "syn26486830" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"funderId" = "Funder_id", | |
"funderName" = "Funder Name" | |
) | |
funder_data_2 <- rename_columns(funder_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {funder_table}")) | |
# synDelete(table_res) | |
synStore(Table(funder_table, funder_data_2)) | |
#### DEVELOPMENT | |
development_csv <- "syn51717837" | |
development_data_am <- synGet(development_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
development_csv <- "syn51717844" | |
development_data_cl <- synGet(development_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
development_csv <- "syn51717848" | |
development_data_gr <- synGet(development_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
development_data <- bind_rows(development_data_gr,development_data_cl, development_data_am) | |
development_table <- "syn26486807" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"developmentId" = "Development_id", | |
"resourceId" = "Resource_id", | |
"investigatorId" = "Investigator_id", | |
"publicationId" = "Publication_id", | |
"funderId" = "Funder_id" | |
) | |
development_data_2 <- rename_columns(development_data, column_name_mapping) | |
# table_res <- synTableQuery(glue::glue("select * from {development_table}")) | |
# synDelete(table_res) | |
synStore(Table(development_table, development_data_2)) | |
#### DONOR | |
donor_csv <- "syn51717838" | |
donor_data_am <- synGet(donor_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
donor_csv <- "syn51717845" | |
donor_data_cl <- synGet(donor_csv)$path %>% | |
read_csv(col_types = "ccccccccccccccccc") %>% | |
select(-Component) | |
donor_data <- bind_rows(donor_data_am, donor_data_cl) | |
donor_table <- "syn26486829" | |
# Define the mapping between the new column names and the old ones | |
column_name_mapping <- c( | |
"parentDonorId" = "Parent Donor_id", | |
"donorId" = "Donor_id", | |
"species" = "Species", | |
"race" = "Race", | |
"sex" = "Sex", | |
"age" = "Age", | |
"transplantationDonorId" = "Transplantation Donor_id" | |
) | |
donor_data_2 <- rename_columns(donor_data, column_name_mapping) %>% | |
mutate(age = case_when(age == "7 MO" ~ "0.583", | |
.default = as.character(age))) %>% | |
mutate(species = case_when(species == "Cricetulus griseus, Homo sapiens" ~ "Cricetulus griseus", | |
species == "Homo sapiens, Mus musculus" ~ "Mus musculus", | |
.default = as.character(species))) | |
# table_res <- synTableQuery(glue::glue("select * from {donor_table}")) | |
# synDelete(table_res) | |
synStore(Table(donor_table, donor_data_2)) | |
##delete rows in prod table | |
table_res <- synTableQuery(glue::glue("select * from syn26470542")) | |
synDelete(table_res) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment