Skip to content

Instantly share code, notes, and snippets.

View JakeConway's full-sized avatar

Jake Conway JakeConway

  • Methuen, Massachusetts
View GitHub Profile
@JakeConway
JakeConway / infer_CCR.R
Created March 24, 2022 21:55
CCF inference
purity.check <- function(maf) {
maf[which(is.na(maf$purity)), ]$purity <- 0
samples <- length(unique(maf[which(maf$purity == 0), ]$Tumor_Sample_Barcode))
rows <- nrow(maf[which(maf$purity == 0), ])
maf <- maf[which(maf$purity > 0), ]
cat("Removed", rows, "rows from", samples, "samples with purity values of 0 or NA\n")
return(maf)
}
tcn.check <- function(maf) {
library(data.table)
# set directory to where file is located
setwd("/Users/jakeconway/Downloads")
# name of file
file.name <- "PR_VanAllen_Haq_Melanoma_Capture_All_Pairs.aggregated_case_sample.seg"
# let the following do the rest
data <- fread(file.name)
data <- data[order(data$Chromosome, data$Start), ]
pairwiseCorrelationOfGenes <- function(maf, sig.genes) {
genes <- sig.genes[which(sig.genes$p < 0.05 & sig.genes$q < 0.3), ]$gene
df <- as.data.frame(t(combn(genes[1:3], 2)))
names(df) <- c("gene1", "gene2")
apply(df, 1, function(x) {
gene1 <- unname(x["gene1"])
gene2 <- unname(x["gene2"])
samples.w.gene1 <- unique(maf[which(maf$Hugo_Symbol == gene1), ]$Tumor_Sample_Barcode)
samples.w.gene2 <- unique(maf[which(maf$Hugo_Symbol == gene2), ]$Tumor_Sample_Barcode)
samples.w.both.genes <- intersect(samples.w.gene1, samples.w.gene2)
var styleJSON = JSON.stringify(success.data).split(",");
for(var i = 0; i < styleJSON.length; i++) {
styleJSON[i] = styleJSON[i].split(":");
var innerLength = styleJSON[i].length;
for(var j = 0; j < innerLength; j++) {
if(styleJSON[i][j].indexOf("[{") > -1) {
styleJSON[i][j] = styleJSON[i][j].split("{");
styleJSON[i][j][0] = "<span style='color: #660000;'>" + "[" + "</span>" + "<br>" +
"<span style='color: #660000;'>" + "{" + "</span>" + "<br>";
styleJSON[i][j][1] = "<span style='color: #006699;'>" + styleJSON[i][j][1] + "</span>" + ": ";
binData <- function(nBins, data, cohort, binSize) {
counts <- c()
for(i in seq(nBins)) {
start <- (i-1)*binSize
end <- i*binSize
count <- c(which(data$start > start & data$start < end))
count <- c(count, which(data$end > start & data$end < end))
count <- c(count, which(data$start < start & data$end > end))
count <- c(count, which(data$start > start & data$end < end))
count <- length(unique(count))
@JakeConway
JakeConway / prostate_cancer_df.R
Last active January 10, 2017 22:55
A script to generate data frame for data in Figure1A of ClinPlots README
#create myocardial infarction data frame based on Figure 1A in repo
#repo link: https://github.com/JakeConway/ClinPlots
MI_df <- data.frame(
gene=c(NA, 'LPA', 'THBS2', 'LDLR', 'LIPC', 'ESR2', 'ESR2', 'FXN'),
SNP_loc=c(NA, 'rs3798220', 'rs8089', 'rs14158', 'rs11630220', 'rs1271572', 'rs35410698', 'rs3793456'),
genotype=c(NA, 'CT', 'AC', 'GG', 'AG', 'CC', 'GG', 'AA'),
LR=c(NA, 1.86, 1.09, 2.88, 1.15, 0.73, 1.03, 0.94),
studies=c(NA, 2, 1, 1, 1, 1, 1, 1),
samples=c(NA, 17031, 4868, 3542, 3542, 3089, 1094, 1094),
pt_probability=c(2.0, 3.7, 4.0, 10.6, 12.0, 9.1, 9.4, 8.9)
@JakeConway
JakeConway / block_table_list.py
Created December 23, 2016 08:25
A script that finds all tables in the UCSC DB compatible with TBRdenWeb block annotations
import MySQLdb, itertools, pprint
#establish connection to UCSC DB
connection = MySQLdb.connect(host = 'genome-mysql.cse.ucsc.edu', user = 'genome',
passwd = '', db = 'hg19')
#generate cursor so we can make queries to the DB
cursor = connection.cursor()
#grab all of the tables in the DB
@JakeConway
JakeConway / Date_to_Day_TEV.R
Last active December 27, 2016 06:12
Script to remove dates from files and convert them to days since initial visit
date_to_day <- function(file_name, first_date){
data <- read.table(file_name, header=TRUE, sep='\t', quote = NULL, row.names = NULL, check.names = FALSE)
data$Date <- as.character(data$Date)
data <- data[order(as.Date(data$Date, "%m/%d/%y")), ]
data$Date <- as.Date(data$Date, "%m/%d/%y")
if(is.null(first_date) == TRUE){
first_date <- data[1, ]$Date
}
else{
first_date <- as.Date(first_date, "%m/%d/%y")
@JakeConway
JakeConway / BananaGenome.R
Last active December 27, 2016 06:12
Manual intersection input of banana genome example
input <- c(
Musa_acuminata = 759,
Phoenix_dactylifera = 769,
Arabidopsis_thaliana = 1187,
Oryza_sativa = 1246,
Sorghum_bicolor = 827,
Brachypodium_distachyon = 387,
"Phoenix_dactylifera&Musa_acuminata" = 467,
"Oryza_sativa&Musa_acuminata" = 29,
"Arabidopsis_thaliana&Oryza_sativa" = 6,
myplot <- function(data, colour){
data <- data[which(data$color == colour), ]
plot_title <- as.character(unique(data$project))
data <- count(data["mutation"])
data$freq <- as.numeric(data$freq)
data$mutation <- as.character(data$mutation)
data <- data[which(nchar(data$mutation) == 3), ]
data <- data[order(data$mutation), ]
bases <- strsplit(data$mutation, ">")
original <- unlist(lapply(bases, function(x){x <- x[1]}))