vsbuffalo · March 7, 2012 18:25
diff --git a/pasilla-test.R b/pasilla-test.R
 ## analysis.R -- looking at how similar the results of edgeR and DESeq are                                                                                   
 ## [email protected] sans poly-A tail                                                                                                                 


 library(pasilla)
 library(DESeq)
 library(edgeR)
 library(ggplot2)
 library(limma)

 data(pasillaGenes)

 ### Data                                                                                                                                                     
 d <- counts(pasillaGenes, normalized=FALSE)
 conds <- pData(pasillaGenes)$condition

 ### Standard single-factor DESeq analysis                                                                                                                    
 cds <- newCountDataSet(d, conds)
 cds <- estimateSizeFactors(cds)
 cds <- estimateDispersions(cds)
 res <- nbinomTest(cds, "treated", "untreated")

 ### Standard single-factor edgeR analysis                                                                                                                    
 dge <- DGEList(d, group=conds)
 dge <- calcNormFactors(dge)
 dge.common <- estimateCommonDisp(dge)
 dge.tag <- estimateTagwiseDisp(dge)
 tt.common <- topTags(exactTest(dge.common), n=1e10)
 tt.tag <- topTags(exactTest(dge.tag), n=1e10)


 ### Comparison                                                                                                                                               
 combined <- local({
  tmp <- merge(res, tt.tag$table, by.x='id', by.y='row.names')
  out <- merge(tmp, tt.common$table, by.x='id', by.y='row.names')
  # now we need to clean up column names                                                                                                                     
  stopifnot(colnames(tt.tag$table) == colnames(tt.common$table)) ## don't assume.                                                                            
  edger.cn <- colnames(tt.tag$table)
  cn <- c(paste(colnames(res), "deseq", sep='.'),
          paste(edger.cn, "edger.tag", sep="."),
          paste(edger.cn, "edger.common", sep="."))
  colnames(out) <- cn
  out
 })

 fdr <- 0.1
 fdr.compare <- with(combined, data.frame(edgeR.tag=adj.P.Val.edger.tag,
                                         edgeR.common=adj.P.Val.edger.common,
                                         DESeq=padj.deseq))

 plotmatrix(fdr.compare)

 vc <- vennCounts(fdr.compare <= fdr)
 vennDiagram(vc)

 # so 522 genes are agreed upon - but are their FDR estimates still                                                                                           
 # consistent?                                                                                                                                                

 sig.i <- apply(fdr.compare <= fdr, 1, all)
 plotmatrix(fdr.compare[sig.i, ])
	## analysis.R -- looking at how similar the results of edgeR and DESeq are
	## [email protected] sans poly-A tail


	library(pasilla)
	library(DESeq)
	library(edgeR)
	library(ggplot2)
	library(limma)

	data(pasillaGenes)

	### Data
	d <- counts(pasillaGenes, normalized=FALSE)
	conds <- pData(pasillaGenes)$condition

	### Standard single-factor DESeq analysis
	cds <- newCountDataSet(d, conds)
	cds <- estimateSizeFactors(cds)
	cds <- estimateDispersions(cds)
	res <- nbinomTest(cds, "treated", "untreated")

	### Standard single-factor edgeR analysis
	dge <- DGEList(d, group=conds)
	dge <- calcNormFactors(dge)
	dge.common <- estimateCommonDisp(dge)
	dge.tag <- estimateTagwiseDisp(dge)
	tt.common <- topTags(exactTest(dge.common), n=1e10)
	tt.tag <- topTags(exactTest(dge.tag), n=1e10)


	### Comparison
	combined <- local({
	tmp <- merge(res, tt.tag$table, by.x='id', by.y='row.names')
	out <- merge(tmp, tt.common$table, by.x='id', by.y='row.names')
	# now we need to clean up column names
	stopifnot(colnames(tt.tag$table) == colnames(tt.common$table)) ## don't assume.
	edger.cn <- colnames(tt.tag$table)
	cn <- c(paste(colnames(res), "deseq", sep='.'),
	paste(edger.cn, "edger.tag", sep="."),
	paste(edger.cn, "edger.common", sep="."))
	colnames(out) <- cn
	out
	})

	fdr <- 0.1
	fdr.compare <- with(combined, data.frame(edgeR.tag=adj.P.Val.edger.tag,
	edgeR.common=adj.P.Val.edger.common,
	DESeq=padj.deseq))

	plotmatrix(fdr.compare)

	vc <- vennCounts(fdr.compare <= fdr)
	vennDiagram(vc)

	# so 522 genes are agreed upon - but are their FDR estimates still
	# consistent?

	sig.i <- apply(fdr.compare <= fdr, 1, all)
	plotmatrix(fdr.compare[sig.i, ])