|
# This code will get all clinical indexed data from TCGA |
|
library(TCGAbiolinks) |
|
library(data.table) |
|
clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>% |
|
regexPipes::grep("TCGA",value=T) %>% |
|
sort %>% |
|
plyr::alply(1,GDCquery_clinic, .progress = "text") %>% |
|
rbindlist |
|
readr::write_csv(clinical,path = paste0("all_clin_indexed.csv")) |
|
|
|
# This code will get all clinical XML data from TCGA |
|
getclinical <- function(proj){ |
|
message(proj) |
|
while(1){ |
|
result = tryCatch({ |
|
query <- GDCquery(project = proj, data.category = "Clinical") |
|
GDCdownload(query) |
|
clinical <- GDCprepare_clinic(query, clinical.info = "patient") |
|
for(i in c("admin","radiation","follow_up","drug","new_tumor_event")){ |
|
message(i) |
|
aux <- GDCprepare_clinic(query, clinical.info = i) |
|
if(is.null(aux)) next |
|
# add suffix manually if it already exists |
|
replicated <- which(grep("bcr_patient_barcode",colnames(aux), value = T,invert = T) %in% colnames(clinical)) |
|
colnames(aux)[replicated] <- paste0(colnames(aux)[replicated],".",i) |
|
if(!is.null(aux)) clinical <- merge(clinical,aux,by = "bcr_patient_barcode", all = TRUE) |
|
} |
|
readr::write_csv(clinical,path = paste0(proj,"_clinical_from_XML.csv")) # Save the clinical data into a csv file |
|
return(clinical) |
|
}, error = function(e) { |
|
message(paste0("Error clinical: ", proj)) |
|
}) |
|
} |
|
} |
|
clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>% |
|
regexPipes::grep("TCGA",value=T) %>% |
|
sort %>% |
|
plyr::alply(1,getclinical, .progress = "text") %>% |
|
rbindlist(fill = TRUE) %>% setDF |
|
readr::write_csv(clinical,path = paste0("all_clin_XML.csv")) |
|
|
|
# Get all batch numbers for each patient |
|
library(TCGAbiolinks) |
|
getBatch <- function(proj){ |
|
message(proj) |
|
while(1){ |
|
result = tryCatch({ |
|
query <- GDCquery(project = proj, data.category = "Biospecimen") |
|
GDCdownload(query) |
|
clinical <- GDCprepare_clinic(query, clinical.info = "admin") |
|
readr::write_csv(clinical,path = paste0(proj,"_batch_from_XML.csv")) # Save the clinical data into a csv file |
|
return(clinical) |
|
}, error = function(e) { |
|
message(paste0("Error clinical: ", proj)) |
|
}) |
|
} |
|
} |
|
biospecimen <- TCGAbiolinks:::getGDCprojects()$project_id %>% |
|
regexPipes::grep("TCGA",value=T) %>% |
|
sort %>% |
|
plyr::alply(1,getBatch, .progress = "text") %>% |
|
rbindlist(fill = TRUE) %>% setDF |
|
|
|
readr::write_csv(biospecimen,path = paste0("biospecimen_from_XML.csv")) |