Skip to content

Instantly share code, notes, and snippets.

View stephenturner's full-sized avatar

Stephen Turner stephenturner

View GitHub Profile
rm(list=ls(all=T)); source("~/.Rprofile")
library(SNPath)
# data(simDat)
# str(y); y
# str(gene.info); gene.info
# str(snp.info); head(snp.info)
# str(sim.pathway); sim.pathway
#
# ?aligator
@stephenturner
stephenturner / cexmanhattan.r
Created July 20, 2011 01:34
cexmanhattan.r
manhattan <- function(dataframe, colors=c("gray10", "gray50"), ymax="max", xaxis.cex=1, limitchromosomes=1:23, suggestiveline=-log10(1e-5), genomewideline=-log10(5e-8), annotate=NULL, ...) {
d=dataframe
#throws error if you don't have columns named CHR, BP, and P in your data frame.
if (!("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d))) stop("Make sure your data frame contains columns CHR, BP, and P")
# limits chromosomes to plot. (23=x, 24=y, 25=par?, 26=mito?)
if (any(limitchromosomes)) d=d[d$CHR %in% limitchromosomes, ]
@stephenturner
stephenturner / Rprofile.R
Created August 12, 2011 03:02
Rprofile.R
# To source this file into an environment to avoid cluttering the global workspace, put this in Rprofile:
# my.env <- new.env(); sys.source("C:/PathTo/THIS_FILE.r", my.env); attach(my.env)
#-----------------------------------------------------------------------
# Load packages, set options and cwd, set up database connection
#-----------------------------------------------------------------------
## Load packages
# require(ggplot2) #plotting
# require(plyr) #data manipulation
@stephenturner
stephenturner / 2011-08-29-rf-adiposity.r
Created August 29, 2011 23:39
2011-08-29-rf-adiposity.r
rm(list=ls(all=TRUE)); dev.off()
library(randomForest)
library(caret)
###############################################################################
############################## load functions #################################
###############################################################################
# Run randomForest keeping importance measures and the forest
RF <- function(...) randomForest(..., importance=T, keep.forest=T, na.action=na.omit)
@stephenturner
stephenturner / transpose_gwas_data.r
Created September 6, 2011 20:42
transpose_gwas_data.r
#install.packages("RODBC") #Only do this once if you don't have RODBC installed
require(RODBC)
connection <- odbcConnect("localhost")
query <- function(...) sqlQuery(connection, ...)
# The cast function will cast a character string as a numeric vector between 0 and 2, or NA
castletter <- function(x) {
x<-sub("0",NA,x)
x<-sub("A",0.0,x)
x<-sub("B",0.1,x)
@stephenturner
stephenturner / geo2r_gse7442.r
Created November 15, 2011 18:23
geo2r_gse7442.r
# R scripts generated Tue Nov 15 13:23:06 EST 2011
################################################################
# Differential expression analysis with limma
library(Biobase)
library(GEOquery)
library(limma)
# load series and platform data from GEO
gset <- getGEO("GSE7442", GSEMatrix =TRUE)
@stephenturner
stephenturner / demo_geo2r.r
Created November 17, 2011 19:46
demo_geo2r.r
gset <- getGEO("GSE7442", GSEMatrix =TRUE)
if (length(gset) > 1) idx <- grep("GPL5058", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# group names for all samples
sml <- c("X","X","X","X","X","G0","X","X","X","X","G0","X","G0","G1","G1","G0","X","X","X","X","X","X","X","X","X","X","X","X","X","X","X","X","X","G1","X","G0","X","X","X","G1","X","X","G1");
# eliminate samples marked as "X"
sel <- which(sml != "X")
sml <- sml[sel]
@stephenturner
stephenturner / noanno.txt
Created January 17, 2012 18:09
noanno.txt
ID logFC AveExpr t P.Value adj.P.Val B
7902702 -6.8 7.8 -46 1.0e-09 3.3e-05 11.0
8117594 -4.3 10.0 -33 9.6e-09 1.5e-04 10.0
8168794 -3.6 6.5 -30 1.7e-08 1.9e-04 9.7
8103736 -3.4 7.7 -28 3.1e-08 2.5e-04 9.3
7897426 -4.0 7.2 -26 4.7e-08 3.1e-04 9.0
8094278 -3.7 7.0 -24 7.4e-08 3.7e-04 8.7
@stephenturner
stephenturner / afteranno.txt
Created January 17, 2012 18:10
afteranno.txt
ID Symbol Name Ensembl logFC AveExpr t P.Value adj.P.Val B
7902702 CLCA2 chloride channel accessory 2 <a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000137975'>ENSG00000137975</a> -6.832901 7.793344 -46.35621 1.015175e-09 3.281148e-05 11.177075
8117594 HIST1H2BM histone cluster 1, H2bm <a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000196374'>ENSG00000196374</a> -4.255756 10.096873 -33.21938 9.579122e-09 1.548034e-04 10.063255
8168794 CENPI centromere protein I <a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000102384'>ENSG00000102384</a> -3.612035 6.486830 -30.41465 1.733246e-08 1.867342e-04 9.698515
8103736 SCRG1 stimulator of chondrogenesis 1 <a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000164
@stephenturner
stephenturner / annotatelimma.r
Created January 17, 2012 18:15
annotatelimma.r
# Install the necessary bioconductor packages. Use the appropriate annotation.db file
# for your organism/platform. See http://www.bioconductor.org/packages/release/data/annotation/
source("http://www.bioconductor.org/biocLite.R")
biocLite("affy")
biocLite("limma")
biocLite("annotate")
biocLite("hugene10sttranscriptcluster.db")
install.packages("R2HTML")
# Load required packages