Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created July 11, 2011 22:27
Show Gist options
  • Save stephenturner/1076949 to your computer and use it in GitHub Desktop.
Save stephenturner/1076949 to your computer and use it in GitHub Desktop.
aligator.r
rm(list=ls(all=T)); source("~/.Rprofile")
library(SNPath)
# data(simDat)
# str(y); y
# str(gene.info); gene.info
# str(snp.info); head(snp.info)
# str(sim.pathway); sim.pathway
#
# ?aligator
# pval <- calc.fun(cl=NULL, snp.dat=snpDat, y=y, snp.method="logiReg")$pval
# path.pval <- aligator(cl=NULL, snp.info=snp.info, gene.info=gene.info, gene.set=sim.pathway, snp.pval=pval, gene.def="rel", dist=20000, snp.pcut=0.05)
# path.pval
my.gene.info <- query("SELECT id AS 'gene.Name', chrom AS chr, start AS Start, end AS End FROM annotation.bed_entrez ORDER BY chr, start, end;")
head(my.gene.info)
my.snp.info <- query("SELECT a.snp AS 'snp.Name', chrom AS chr, position AS pos FROM mec.results_ac_sleep a JOIN mec.snp_stats b ON a.snp=b.snp ORDER BY chrom, position;")
head(my.snp.info)
my.snp.pval <- query("SELECT p FROM mec.results_ac_sleep a JOIN mec.snp_stats b ON a.snp=b.snp ORDER BY chrom, position;")[[1]]
head(my.snp.pval)[[1]]
# Getting pathway info in the correct format
# See http://stackoverflow.com/questions/6602881/text-file-to-list-in-r
#my.pathway <- scan("C:/Users/sturner/Desktop/test.txt", what="", sep="\n")
deparse <- function(x) {y<-NULL; for (i in 1:nrow(x)) y[i] <- paste(x[i,1],x[i,2]); y}
my.pathway <- query("SELECT pathid, entrezids FROM annotation.msigdb_canonical;")
head(my.pathway)
my.pathway <- deparse(my.pathway)
head(my.pathway)
my.pathway <- strsplit(my.pathway, "[[:space:]]+") # Separate by whitespace
names(my.pathway) <- sapply(my.pathway, function(x) x[[1]]) # Set element name to 1st element in vector
my.pathway <- lapply(my.pathway, function(x) x[-1]) # Remove 1st vector element from each list element
head(my.pathway)
length(my.snp.pval); nrow(my.snp.info) # must be equal!
nrow(my.gene.info) # number of annotated genes
length(my.pathway) # number of pathways
#cl <- makeCluster(c("localhost","localhost"), type = "SOCK")
system.time(
pathpvals <- aligator(cl=NULL, # cl=NULL if you don't want to parallelize
snp.info=my.snp.info,
gene.info=my.gene.info,
gene.set=my.pathway,
snp.pval=my.snp.pval,
gene.def="abs",
dist=20000,
Bresample=5000,
snp.pcut=0.001)
)
#stopCluster(cl)
save.image("tmp_aligator_msigdb.Rdata")
tmp <- pathpvals
pathpvals <- as.data.frame(pathpvals)
pathpvals$pathid <- row.names(pathpvals)
row.names(pathpvals) <- NULL
pathpvals$p <- pathpvals$pathpvals
pathpvals$pathpvals <- NULL
subset(pathpvals[order(pathpvals$p),], p<0.1)
# Save these results, and do the analysis on the permuted data.
pathpvals_real <- pathpvals
tmp <- my.snp.pval
my.snp.pval <- query("SELECT p FROM mec.results_ac_sleep_permuted42 a JOIN mec.snp_stats b ON a.snp=b.snp ORDER BY chrom, position;")[[1]]
# now run the above code, then rename
pathpvals_perm <- pathpvals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment