JakeConway’s gists

JakeConway / fromExpression.R

Last active December 27, 2016 06:14

expression to UpSetR converter

	fromExpression <- function(vd){
	vd <- list(vd)
	intersections <- lapply(vd, function(x) strsplit(names(unlist(x)), "&"))
	intersections <- lapply(intersections[[1]], function(x) unlist(as.list(x)))
	sets <- unique(unlist(intersections))
	data <- na.omit(data.frame(matrix(NA, ncol = length(sets))))
	names(data) <- sets
	counts <- lapply(vd, function(x) unlist(x))
	names(counts[[1]]) <- NULL
	counts[[1]] <- as.numeric(counts[[1]])

JakeConway / fromList.R

Last active December 27, 2016 06:14

Convert list of named vectors to UpSetR input

	fromList <- function(input){
	elements <- unique(unlist(input))
	data <- unlist(lapply(input, function(x){x <- as.vector(match(elements, x))}))
	data[is.na(data)] <- as.integer(0); data[data != 0] <- as.integer(1)
	data <- data.frame(matrix(data, ncol = length(input), byrow = F))
	data <- data[which(rowSums(data) !=0), ]
	names(data) <- names(input)
	return(data)
	}

JakeConway / icgcToUpSetR.R

Last active December 27, 2016 06:13

	setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 1")
	fileList <- list.files()
	set1 <- data.frame(); set2 <- data.frame(); set3 <- data.frame();
	for(file in fileList){
	temp_data <- read.table(file, header=TRUE, sep="\t")
	temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
	set1 <- rbind(set1, temp_data)
	rm(temp_data)
	}
	setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 2")

JakeConway / ICGC_Rest_API_to_UpSetR.R

Last active December 27, 2016 06:11

Pull mutation data from projects via the ICGC Rest API

	require(jsonlite)
	require(curl)

	#An example of the function input.
	#Specify the project name, fields, and number of entries(size) to pull in each list

	data <- list(list(project = "THCA-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 6659),
	list(project = "THCA-SA", fields = c("id", "mutation", "chromosome", "start", "end"), size = 45126),
	list(project = "LUSC-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 65063),
	list(project = "LUSC-KR", fields = c("id", "mutation", "chromosome", "start", "end"), size = 64671),

JakeConway / Generate_UpSetR_plot_with_SNP_Heat_Map.R

Last active December 27, 2016 06:13

	myplot <- function(data, colour){
	data <- data[which(data$color == colour), ]
	plot_title <- as.character(unique(data$project))
	data <- count(data["mutation"])
	data$freq <- as.numeric(data$freq)
	data$mutation <- as.character(data$mutation)
	data <- data[which(nchar(data$mutation) == 3), ]
	data <- data[order(data$mutation), ]
	bases <- strsplit(data$mutation, ">")
	original <- unlist(lapply(bases, function(x){x <- x[1]}))

JakeConway / BananaGenome.R

Last active December 27, 2016 06:12

Manual intersection input of banana genome example

	input <- c(
	Musa_acuminata = 759,
	Phoenix_dactylifera = 769,
	Arabidopsis_thaliana = 1187,
	Oryza_sativa = 1246,
	Sorghum_bicolor = 827,
	Brachypodium_distachyon = 387,
	"Phoenix_dactylifera&Musa_acuminata" = 467,
	"Oryza_sativa&Musa_acuminata" = 29,
	"Arabidopsis_thaliana&Oryza_sativa" = 6,

JakeConway / Date_to_Day_TEV.R

Last active December 27, 2016 06:12

Script to remove dates from files and convert them to days since initial visit

	date_to_day <- function(file_name, first_date){
	data <- read.table(file_name, header=TRUE, sep='\t', quote = NULL, row.names = NULL, check.names = FALSE)
	data$Date <- as.character(data$Date)
	data <- data[order(as.Date(data$Date, "%m/%d/%y")), ]
	data$Date <- as.Date(data$Date, "%m/%d/%y")
	if(is.null(first_date) == TRUE){
	first_date <- data[1, ]$Date
	}
	else{
	first_date <- as.Date(first_date, "%m/%d/%y")

JakeConway / block_table_list.py

Created December 23, 2016 08:25

A script that finds all tables in the UCSC DB compatible with TBRdenWeb block annotations

	import MySQLdb, itertools, pprint

	#establish connection to UCSC DB
	connection = MySQLdb.connect(host = 'genome-mysql.cse.ucsc.edu', user = 'genome',
	passwd = '', db = 'hg19')

	#generate cursor so we can make queries to the DB
	cursor = connection.cursor()

	#grab all of the tables in the DB

JakeConway / prostate_cancer_df.R

Last active January 10, 2017 22:55

A script to generate data frame for data in Figure1A of ClinPlots README

	#create myocardial infarction data frame based on Figure 1A in repo
	#repo link: https://github.com/JakeConway/ClinPlots
	MI_df <- data.frame(
	gene=c(NA, 'LPA', 'THBS2', 'LDLR', 'LIPC', 'ESR2', 'ESR2', 'FXN'),
	SNP_loc=c(NA, 'rs3798220', 'rs8089', 'rs14158', 'rs11630220', 'rs1271572', 'rs35410698', 'rs3793456'),
	genotype=c(NA, 'CT', 'AC', 'GG', 'AG', 'CC', 'GG', 'AA'),
	LR=c(NA, 1.86, 1.09, 2.88, 1.15, 0.73, 1.03, 0.94),
	studies=c(NA, 2, 1, 1, 1, 1, 1, 1),
	samples=c(NA, 17031, 4868, 3542, 3542, 3089, 1094, 1094),
	pt_probability=c(2.0, 3.7, 4.0, 10.6, 12.0, 9.1, 9.4, 8.9)

JakeConway / ChrDistribution.R

Last active March 7, 2017 04:59

	binData <- function(nBins, data, cohort, binSize) {
	counts <- c()
	for(i in seq(nBins)) {
	start <- (i-1)*binSize
	end <- i*binSize
	count <- c(which(data$start > start & data$start < end))
	count <- c(count, which(data$end > start & data$end < end))
	count <- c(count, which(data$start < start & data$end > end))
	count <- c(count, which(data$start > start & data$end < end))
	count <- length(unique(count))

Jake Conway JakeConway