Skip to content

Instantly share code, notes, and snippets.

View JakeConway's full-sized avatar

Jake Conway JakeConway

  • Methuen, Massachusetts
View GitHub Profile
@JakeConway
JakeConway / fromExpression.R
Last active December 27, 2016 06:14
expression to UpSetR converter
fromExpression <- function(vd){
vd <- list(vd)
intersections <- lapply(vd, function(x) strsplit(names(unlist(x)), "&"))
intersections <- lapply(intersections[[1]], function(x) unlist(as.list(x)))
sets <- unique(unlist(intersections))
data <- na.omit(data.frame(matrix(NA, ncol = length(sets))))
names(data) <- sets
counts <- lapply(vd, function(x) unlist(x))
names(counts[[1]]) <- NULL
counts[[1]] <- as.numeric(counts[[1]])
@JakeConway
JakeConway / fromList.R
Last active December 27, 2016 06:14
Convert list of named vectors to UpSetR input
fromList <- function(input){
elements <- unique(unlist(input))
data <- unlist(lapply(input, function(x){x <- as.vector(match(elements, x))}))
data[is.na(data)] <- as.integer(0); data[data != 0] <- as.integer(1)
data <- data.frame(matrix(data, ncol = length(input), byrow = F))
data <- data[which(rowSums(data) !=0), ]
names(data) <- names(input)
return(data)
}
setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 1")
fileList <- list.files()
set1 <- data.frame(); set2 <- data.frame(); set3 <- data.frame();
for(file in fileList){
temp_data <- read.table(file, header=TRUE, sep="\t")
temp_data$Mutation.ID <- as.character(temp_data$Mutation.ID)
set1 <- rbind(set1, temp_data)
rm(temp_data)
}
setwd("/Users/jakeconway/Desktop/icgcSampleData/Set 2")
@JakeConway
JakeConway / ICGC_Rest_API_to_UpSetR.R
Last active December 27, 2016 06:11
Pull mutation data from projects via the ICGC Rest API
require(jsonlite)
require(curl)
#An example of the function input.
#Specify the project name, fields, and number of entries(size) to pull in each list
data <- list(list(project = "THCA-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 6659),
list(project = "THCA-SA", fields = c("id", "mutation", "chromosome", "start", "end"), size = 45126),
list(project = "LUSC-US", fields = c("id", "mutation", "chromosome", "start", "end"), size = 65063),
list(project = "LUSC-KR", fields = c("id", "mutation", "chromosome", "start", "end"), size = 64671),
myplot <- function(data, colour){
data <- data[which(data$color == colour), ]
plot_title <- as.character(unique(data$project))
data <- count(data["mutation"])
data$freq <- as.numeric(data$freq)
data$mutation <- as.character(data$mutation)
data <- data[which(nchar(data$mutation) == 3), ]
data <- data[order(data$mutation), ]
bases <- strsplit(data$mutation, ">")
original <- unlist(lapply(bases, function(x){x <- x[1]}))
@JakeConway
JakeConway / BananaGenome.R
Last active December 27, 2016 06:12
Manual intersection input of banana genome example
input <- c(
Musa_acuminata = 759,
Phoenix_dactylifera = 769,
Arabidopsis_thaliana = 1187,
Oryza_sativa = 1246,
Sorghum_bicolor = 827,
Brachypodium_distachyon = 387,
"Phoenix_dactylifera&Musa_acuminata" = 467,
"Oryza_sativa&Musa_acuminata" = 29,
"Arabidopsis_thaliana&Oryza_sativa" = 6,
@JakeConway
JakeConway / Date_to_Day_TEV.R
Last active December 27, 2016 06:12
Script to remove dates from files and convert them to days since initial visit
date_to_day <- function(file_name, first_date){
data <- read.table(file_name, header=TRUE, sep='\t', quote = NULL, row.names = NULL, check.names = FALSE)
data$Date <- as.character(data$Date)
data <- data[order(as.Date(data$Date, "%m/%d/%y")), ]
data$Date <- as.Date(data$Date, "%m/%d/%y")
if(is.null(first_date) == TRUE){
first_date <- data[1, ]$Date
}
else{
first_date <- as.Date(first_date, "%m/%d/%y")
@JakeConway
JakeConway / block_table_list.py
Created December 23, 2016 08:25
A script that finds all tables in the UCSC DB compatible with TBRdenWeb block annotations
import MySQLdb, itertools, pprint
#establish connection to UCSC DB
connection = MySQLdb.connect(host = 'genome-mysql.cse.ucsc.edu', user = 'genome',
passwd = '', db = 'hg19')
#generate cursor so we can make queries to the DB
cursor = connection.cursor()
#grab all of the tables in the DB
@JakeConway
JakeConway / prostate_cancer_df.R
Last active January 10, 2017 22:55
A script to generate data frame for data in Figure1A of ClinPlots README
#create myocardial infarction data frame based on Figure 1A in repo
#repo link: https://github.com/JakeConway/ClinPlots
MI_df <- data.frame(
gene=c(NA, 'LPA', 'THBS2', 'LDLR', 'LIPC', 'ESR2', 'ESR2', 'FXN'),
SNP_loc=c(NA, 'rs3798220', 'rs8089', 'rs14158', 'rs11630220', 'rs1271572', 'rs35410698', 'rs3793456'),
genotype=c(NA, 'CT', 'AC', 'GG', 'AG', 'CC', 'GG', 'AA'),
LR=c(NA, 1.86, 1.09, 2.88, 1.15, 0.73, 1.03, 0.94),
studies=c(NA, 2, 1, 1, 1, 1, 1, 1),
samples=c(NA, 17031, 4868, 3542, 3542, 3089, 1094, 1094),
pt_probability=c(2.0, 3.7, 4.0, 10.6, 12.0, 9.1, 9.4, 8.9)
binData <- function(nBins, data, cohort, binSize) {
counts <- c()
for(i in seq(nBins)) {
start <- (i-1)*binSize
end <- i*binSize
count <- c(which(data$start > start & data$start < end))
count <- c(count, which(data$end > start & data$end < end))
count <- c(count, which(data$start < start & data$end > end))
count <- c(count, which(data$start > start & data$end < end))
count <- length(unique(count))