Created
May 22, 2015 20:47
-
-
Save DarwinAwardWinner/6c59b0947d24ae06e95b to your computer and use it in GitHub Desktop.
Using edgeR glmFit on DESeq CountDataSet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## The following two functions allow the use to edgeR's infrastructure | |
| ## to execute the DESeq method. Instead of glmFit(dge, design), use | |
| ## glmFit.CountDataSet(cds, design), then continue as with a normal | |
| ## edgeR analysis. | |
| getOffset.CountDataSet <- function(y) { | |
| if (any(is.na(sizeFactors(y)))) | |
| stop("Call estimateSizeFactors first") | |
| log(sizeFactors(y)) - mean(log(sizeFactors(y))) + mean(log(colSums(counts(y)))) | |
| } | |
| glmFit.CountDataSet <- function (y, design = NULL, dispersion = NULL, offset = NULL, | |
| weights = NULL, lib.size = NULL, start = NULL, method = "auto", | |
| ...) | |
| { | |
| stopifnot(is(y, "CountDataSet")) | |
| if (is.null(dispersion)) { | |
| if ("disp_pooled" %in% colnames(fData(y))) | |
| dispersion <- fData(y)$disp_pooled | |
| else if ("disp_blind" %in% colnames(fData(y))) { | |
| if (fitInfo(y, "blind")$sharingMode != "fit-only") | |
| warning("You have used 'method=\"blind\"' in estimateDispersion without also setting 'sharingMode=\"fit-only\"'. This will not yield useful results.") | |
| dispersion <- fData(y)$disp_blind | |
| } | |
| else stop("Call 'estimateDispersions' with 'method=\"pooled\"' (or 'blind') first.") | |
| } | |
| if (is.null(offset) && is.null(lib.size)) | |
| offset <- getOffset.CountDataSet(y) | |
| ## UGLY HACK: DESeq can produce infinite dispersion estimates, which | |
| ## cause errors in glmFit. To fix this, we replace infinite | |
| ## dispersion estimates with the maximum representable floating | |
| ## point value, which should always result in a PValue of 1. | |
| infdisp <- !is.finite(dispersion) | |
| dispersion[infdisp] <- .Machine$double.xmax | |
| fit <- glmFit.default(y = counts(y), design = design, dispersion = dispersion, | |
| offset = offset, weights = weights, lib.size = lib.size, | |
| start = start, method = method, ...) | |
| ## Now set the dispersions back to infinite in the resulting fit object. | |
| fit$dispersion[infdisp] <- +Inf | |
| dimnames(fit$coefficients) <- list(rownames(counts(y)), colnames(design)) | |
| fit$counts <- counts(y) | |
| fit$samples <- pData(y)[-1] | |
| fit$genes <- fData(y)[setdiff(names(fData(y)), c("disp_blind", "disp_pooled"))] | |
| new("DGEGLM", fit) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment