Last active
April 9, 2020 18:00
-
-
Save cgpu/79b0ce2591e35c8359de756c1f21274d to your computer and use it in GitHub Desktop.
Inspect duplicate ids in SAMPID column
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From yarn::downloadGtexV8 | |
pData <- read.csv("pData_yarn_downloadGTExV8.csv") | |
# rMATS analysed | |
samples_sra <- as.vector(t(read.csv("samples.csv", header = FALSE))) | |
# From SraRunTable | |
meta.data <- read.csv("SraRunTable.noCram.noExome.noWGS.totalRNA.txt", header = T) | |
meta.data <- meta.data[meta.data$Run %in% samples_sra, ] | |
dim(meta.data) | |
# Reform GTEX ids in meta.data$biospecimen_repository_sample_id to match pData$SAMPID | |
meta.data$SAMPID <- gsub("-","." , meta.data$biospecimen_repository_sample_id) | |
dim(meta.data) | |
#[1] 8673 80 | |
# Merge the 2 metadata tables by column SAMPID | |
merged <- merge(meta.data, pData, by = "SAMPID", all = FALSE) | |
dim(merged) | |
# Isolate the two columns with the ids | |
id <- merged[,c("Run","SAMPID")] | |
# Inspect uniqueness of ids in columns SAMPID, | |
janitor::get_dupes(id, SAMPID) | |
janitor::get_dupes(id, Run) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment