Skip to content

Instantly share code, notes, and snippets.

@sahilseth
Created March 23, 2017 17:47
Show Gist options
  • Save sahilseth/3625dd6e9129bc1b1d46260c85f1e3b0 to your computer and use it in GitHub Desktop.
Save sahilseth/3625dd6e9129bc1b1d46260c85f1e3b0 to your computer and use it in GitHub Desktop.
a function to split TCGA barcodes
split_tcga_barcode <- function(x, split.char="-"){
tmp = strsplit(x, split.char)[[1]]
# default values
s.tss = "NA";
s.participant = "NA";
s.programcode = "NA";
s.nucleic = "NA";
s.portion = "NA";
s.sampletype = "00";
# tooling needs this
s.portionanalyte = "NA";
error = 0
# Do some validations as well, TSS - 2 chars, sampletype - 2chars
s.tss = tmp[2];
s.participant = tmp[3];
s.sampletype = substr(tmp[4], 1, 2)
# splitting aliquot and its type DNA RNA...
tmp2 <- strsplit(gsub("([0-9]*)([A-Z]*)","\\1!\\2",tmp[5]),"!")[[1]]
# extract
if(tmp2[2] %in% c("D","G","W","X")){
s.nucleic <- "DNA"
}else if(tmp2[2] %in% c("H","R","T")){
s.nucleic <- "RNA"
}
s.portion = tmp2[1];
s.portionanalyte = tmp2[2]
return(list(sampleid = x,
tissuesourcesite = s.tss,
participant = s.participant,
sampletype = s.sampletype,
programcode = s.programcode,
nucleicacidtype = s.nucleic,
portion = s.portion,
portionanalyte = s.portionanalyte,
error = error))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment