Skip to content

Instantly share code, notes, and snippets.

@JakeConway
Last active December 27, 2016 06:13
Show Gist options
  • Save JakeConway/d875a8504ec67a0719fd to your computer and use it in GitHub Desktop.
Save JakeConway/d875a8504ec67a0719fd to your computer and use it in GitHub Desktop.
setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 1")
fileList <- list.files()
set1 <- data.frame(); set2 <- data.frame(); set3 <- data.frame();
for(file in fileList){
temp_data <- read.table(file, header=TRUE, sep="\t")
temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
set1 <- rbind(set1, temp_data)
rm(temp_data)
}
setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 2")
fileList <- list.files()
for(file in fileList){
temp_data <- read.table(file, header=TRUE, sep="\t")
temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
set2 <- rbind(set2, temp_data)
rm(temp_data)
}
setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 3")
fileList <- list.files()
for(file in fileList){
temp_data <- read.table(file, header=TRUE, sep="\t")
temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
set3 <- rbind(set3, temp_data)
rm(temp_data)
}
set1IDs <- set1$Mutation.ID
set2IDs <- set2$Mutation.ID
set3IDs <- set3$Mutation.ID
data <- do.call("rbind", list(set1, set2, set3))
rm(set1, set2, set3)
fill <- rep(0, 3*nrow(data))
setMatrix <- matrix(fill, ncol = 3)
colnames(setMatrix) <- c("Set1", "Set2", "Set3")
data <- cbind(data, setMatrix)
data$Set1[which(data$Mutation.ID %in% set1IDs)] <- 1
data$Set2[which(data$Mutation.ID %in% set2IDs)] <- 1
data$Set3[which(data$Mutation.ID %in% set3IDs)] <- 1
genomicData <- as.character(data$Genomic.DNA.Change)
genomicData <- strsplit(genomicData, ":g.")
chromosome <- unlist(lapply(genomicData, function(x){x <- x[1]}))
specificSub <- unlist(lapply(genomicData, function(x){ x <- gsub('[[:digit:]]+', '', x[2])}))
data$chromosome <- chromosome
data$specific.substitution <- specificSub
rm(genomicData); data$Genomic.DNA.Change <- NULL
cons <- as.character(data$Consequences)
typeAndGene <- strsplit(cons, ": ")
mutationType <- unlist(lapply(typeAndGene, function(x){x <- x[1]}))
gene <- unlist(lapply(typeAndGene, function(x){x <- strsplit(x[2], " "); x <- x[[1]][1]}))
data$specific.mutation.type <- mutationType
data$gene <- gene
totalDonors <- unlist(lapply(strsplit(as.character(data$Donors.Affected), "/"), function(x)x <-x[2]))
data$total.donors <- as.integer(totalDonors)
data$Consequences <- as.character(data$Consequences)
data$Type <- as.character(data$Type)
data$Donors.Affected <- as.character(data$Donors.Affected)
data$Projects.Mutation.Observed <- as.character(data$Projects.Mutation.Observed)
setwd("/Users/jakeconway/Desktop")
write.table(data, file = "icgcData.txt", sep = "\t")
jsonData <- list(
file = "/Users/jakeconway/Desktop",
name = "icgcData.txt",
header = 0,
separator = "\t",
skip = 0,
meta = list(c(type = "id", index = 0, name = "Mutation.ID"),
c(type = "character", index = 1, name = "Type"),
c(type = "character", index = 2, name = "Consequences"),
c(type = "character", index = 3, name = "Donors.Affected"),
c(type = "character", index = 4, name = "Projects.Mutation.Observed"),
c(type = "character", index = 8, name = "chromosome"),
c(type = "character", index = 9, name = "specific.substitution"),
c(type = "character", index = 10, name = "specific.mutation.type"),
c(type = "character", index = 11, name = "gene"),
c(type = "integer", index = 12, name = "total.donors")),
sets = c(format = "binary", start = 5, end = 7)
)
jsonData <- toJSON(jsonData)
write(jsonData, "icgcData.json")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment