stephenturner · February 29, 2012 23:21
diff --git a/pathway.r b/pathway.r
 # These are bioconductor packages. See http://www.bioconductor.org/install/ for installation instructions
 library(Biobase)
 library(GEOquery)
 library(limma)
 library(SPIA)
 library(hgu133plus2.db)

 # load series and platform data from GEO: 
 # http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GSE4107
 eset <- getGEO("GSE4107", GSEMatrix =TRUE)[[1]] 

 # log transform 
 exprs(eset) <- log2(exprs(eset))

 # set up a design matrix and contrast matrix
 design <- model.matrix(~0+as.factor(c(rep(1,12), rep(0,10))))
 colnames(design) <- c("cancer","normal")
 contrast.matrix <- makeContrasts(cancer_v_normal=cancer-normal, levels=design)

 # run the analysis with empirical Bayes moderated standard errors
 fit <- lmFit(eset,design)
 fit2 <- contrasts.fit(fit, contrast.matrix)
 fit2 <- eBayes(fit2)


 # get useful information for the top 25 genes
 top <- topTable(fit2, coef="cancer_v_normal", number=nrow(fit2), adjust.method="fdr")
 top <- na.omit(subset(top, select=c(ID, logFC, adj.P.Val)))
 top$ID <- as.character(top$ID)

 # annotate with entrez info
 top$ENTREZ<-unlist(as.list(hgu133plus2ENTREZID[top$ID]))
 top<-top[!is.na(top$ENTREZ),]
 top<-top[!duplicated(top$ENTREZ),]
 top$SYMBOL<-unlist(as.list(hgu133plus2SYMBOL[top$ID]))
 top<-top[!is.na(top$SYMBOL),]
 top<-top[!duplicated(top$SYMBOL),]

 top[1:20,]
	# These are bioconductor packages. See http://www.bioconductor.org/install/ for installation instructions
	library(Biobase)
	library(GEOquery)
	library(limma)
	library(SPIA)
	library(hgu133plus2.db)

	# load series and platform data from GEO:
	# http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GSE4107
	eset <- getGEO("GSE4107", GSEMatrix =TRUE)[[1]]

	# log transform
	exprs(eset) <- log2(exprs(eset))

	# set up a design matrix and contrast matrix
	design <- model.matrix(~0+as.factor(c(rep(1,12), rep(0,10))))
	colnames(design) <- c("cancer","normal")
	contrast.matrix <- makeContrasts(cancer_v_normal=cancer-normal, levels=design)

	# run the analysis with empirical Bayes moderated standard errors
	fit <- lmFit(eset,design)
	fit2 <- contrasts.fit(fit, contrast.matrix)
	fit2 <- eBayes(fit2)


	# get useful information for the top 25 genes
	top <- topTable(fit2, coef="cancer_v_normal", number=nrow(fit2), adjust.method="fdr")
	top <- na.omit(subset(top, select=c(ID, logFC, adj.P.Val)))
	top$ID <- as.character(top$ID)

	# annotate with entrez info
	top$ENTREZ<-unlist(as.list(hgu133plus2ENTREZID[top$ID]))
	top<-top[!is.na(top$ENTREZ),]
	top<-top[!duplicated(top$ENTREZ),]
	top$SYMBOL<-unlist(as.list(hgu133plus2SYMBOL[top$ID]))
	top<-top[!is.na(top$SYMBOL),]
	top<-top[!duplicated(top$SYMBOL),]

	top[1:20,]