JakeConway · December 27, 2016 06:13
diff --git a/icgcToUpSetR.R b/icgcToUpSetR.R
 setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 1")
 fileList <- list.files()
 set1 <- data.frame(); set2 <- data.frame(); set3 <- data.frame();
 for(file in fileList){
  temp_data <- read.table(file, header=TRUE, sep="\t")
  temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
  set1 <- rbind(set1, temp_data)
  rm(temp_data)
 }
 setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 2")
 fileList <- list.files()
 for(file in fileList){
  temp_data <- read.table(file, header=TRUE, sep="\t")
  temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
  set2 <- rbind(set2, temp_data)
  rm(temp_data)
 }
 setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 3")
 fileList <- list.files()
 for(file in fileList){
  temp_data <- read.table(file, header=TRUE, sep="\t")
  temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
  set3 <- rbind(set3, temp_data)
  rm(temp_data)
 }
 set1IDs <- set1$Mutation.ID
 set2IDs <- set2$Mutation.ID
 set3IDs <- set3$Mutation.ID

 data <- do.call("rbind", list(set1, set2, set3))
 rm(set1, set2, set3)

 fill <- rep(0, 3*nrow(data))
 setMatrix <- matrix(fill, ncol = 3)
 colnames(setMatrix) <- c("Set1", "Set2", "Set3")
 data <- cbind(data, setMatrix)

 data$Set1[which(data$Mutation.ID %in% set1IDs)] <- 1
 data$Set2[which(data$Mutation.ID %in% set2IDs)] <- 1
 data$Set3[which(data$Mutation.ID %in% set3IDs)] <- 1

 genomicData <- as.character(data$Genomic.DNA.Change)
 genomicData <- strsplit(genomicData, ":g.")
 chromosome <- unlist(lapply(genomicData, function(x){x <- x[1]}))
 specificSub <- unlist(lapply(genomicData, function(x){ x <- gsub('[[:digit:]]+', '', x[2])}))
 data$chromosome <- chromosome
 data$specific.substitution <- specificSub
 rm(genomicData); data$Genomic.DNA.Change <- NULL

 cons <- as.character(data$Consequences)
 typeAndGene <- strsplit(cons, ": ")
 mutationType <- unlist(lapply(typeAndGene, function(x){x <- x[1]}))
 gene <- unlist(lapply(typeAndGene, function(x){x <- strsplit(x[2], " "); x <- x[[1]][1]}))
 data$specific.mutation.type <- mutationType
 data$gene <- gene

 totalDonors <- unlist(lapply(strsplit(as.character(data$Donors.Affected), "/"), function(x)x <-x[2]))
 data$total.donors <- as.integer(totalDonors)
 data$Consequences <- as.character(data$Consequences)
 data$Type <- as.character(data$Type)
 data$Donors.Affected <- as.character(data$Donors.Affected)
 data$Projects.Mutation.Observed <- as.character(data$Projects.Mutation.Observed)

 setwd("/Users/jakeconway/Desktop")
 write.table(data, file = "icgcData.txt", sep = "\t")

 jsonData <- list(
 file = "/Users/jakeconway/Desktop",
 name = "icgcData.txt",
 header = 0,
 separator = "\t",
 skip = 0,
 meta = list(c(type = "id", index = 0, name = "Mutation.ID"),
             c(type = "character", index = 1, name = "Type"),
             c(type = "character", index = 2, name = "Consequences"),
             c(type = "character", index = 3, name = "Donors.Affected"),
             c(type = "character", index = 4, name = "Projects.Mutation.Observed"),
             c(type = "character", index = 8, name = "chromosome"),
             c(type = "character", index = 9, name = "specific.substitution"),
             c(type = "character", index = 10, name = "specific.mutation.type"),
             c(type = "character", index = 11, name = "gene"),
             c(type = "integer", index = 12, name = "total.donors")),
 sets = c(format = "binary", start = 5, end = 7)
 )

 jsonData <- toJSON(jsonData)
 write(jsonData, "icgcData.json")
	setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 1")
	fileList <- list.files()
	set1 <- data.frame(); set2 <- data.frame(); set3 <- data.frame();
	for(file in fileList){
	temp_data <- read.table(file, header=TRUE, sep="\t")
	temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
	set1 <- rbind(set1, temp_data)
	rm(temp_data)
	}
	setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 2")
	fileList <- list.files()
	for(file in fileList){
	temp_data <- read.table(file, header=TRUE, sep="\t")
	temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
	set2 <- rbind(set2, temp_data)
	rm(temp_data)
	}
	setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 3")
	fileList <- list.files()
	for(file in fileList){
	temp_data <- read.table(file, header=TRUE, sep="\t")
	temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
	set3 <- rbind(set3, temp_data)
	rm(temp_data)
	}
	set1IDs <- set1$Mutation.ID
	set2IDs <- set2$Mutation.ID
	set3IDs <- set3$Mutation.ID

	data <- do.call("rbind", list(set1, set2, set3))
	rm(set1, set2, set3)

	fill <- rep(0, 3*nrow(data))
	setMatrix <- matrix(fill, ncol = 3)
	colnames(setMatrix) <- c("Set1", "Set2", "Set3")
	data <- cbind(data, setMatrix)

	data$Set1[which(data$Mutation.ID %in% set1IDs)] <- 1
	data$Set2[which(data$Mutation.ID %in% set2IDs)] <- 1
	data$Set3[which(data$Mutation.ID %in% set3IDs)] <- 1

	genomicData <- as.character(data$Genomic.DNA.Change)
	genomicData <- strsplit(genomicData, ":g.")
	chromosome <- unlist(lapply(genomicData, function(x){x <- x[1]}))
	specificSub <- unlist(lapply(genomicData, function(x){ x <- gsub('[[:digit:]]+', '', x[2])}))
	data$chromosome <- chromosome
	data$specific.substitution <- specificSub
	rm(genomicData); data$Genomic.DNA.Change <- NULL

	cons <- as.character(data$Consequences)
	typeAndGene <- strsplit(cons, ": ")
	mutationType <- unlist(lapply(typeAndGene, function(x){x <- x[1]}))
	gene <- unlist(lapply(typeAndGene, function(x){x <- strsplit(x[2], " "); x <- x[[1]][1]}))
	data$specific.mutation.type <- mutationType
	data$gene <- gene

	totalDonors <- unlist(lapply(strsplit(as.character(data$Donors.Affected), "/"), function(x)x <-x[2]))
	data$total.donors <- as.integer(totalDonors)
	data$Consequences <- as.character(data$Consequences)
	data$Type <- as.character(data$Type)
	data$Donors.Affected <- as.character(data$Donors.Affected)
	data$Projects.Mutation.Observed <- as.character(data$Projects.Mutation.Observed)

	setwd("/Users/jakeconway/Desktop")
	write.table(data, file = "icgcData.txt", sep = "\t")

	jsonData <- list(
	file = "/Users/jakeconway/Desktop",
	name = "icgcData.txt",
	header = 0,
	separator = "\t",
	skip = 0,
	meta = list(c(type = "id", index = 0, name = "Mutation.ID"),
	c(type = "character", index = 1, name = "Type"),
	c(type = "character", index = 2, name = "Consequences"),
	c(type = "character", index = 3, name = "Donors.Affected"),
	c(type = "character", index = 4, name = "Projects.Mutation.Observed"),
	c(type = "character", index = 8, name = "chromosome"),
	c(type = "character", index = 9, name = "specific.substitution"),
	c(type = "character", index = 10, name = "specific.mutation.type"),
	c(type = "character", index = 11, name = "gene"),
	c(type = "integer", index = 12, name = "total.donors")),
	sets = c(format = "binary", start = 5, end = 7)
	)

	jsonData <- toJSON(jsonData)
	write(jsonData, "icgcData.json")