Skip to content

Instantly share code, notes, and snippets.

@DarwinAwardWinner
Created May 22, 2015 20:47
Show Gist options
  • Save DarwinAwardWinner/6c59b0947d24ae06e95b to your computer and use it in GitHub Desktop.
Save DarwinAwardWinner/6c59b0947d24ae06e95b to your computer and use it in GitHub Desktop.
Using edgeR glmFit on DESeq CountDataSet
## The following two functions allow the use to edgeR's infrastructure
## to execute the DESeq method. Instead of glmFit(dge, design), use
## glmFit.CountDataSet(cds, design), then continue as with a normal
## edgeR analysis.
getOffset.CountDataSet <- function(y) {
if (any(is.na(sizeFactors(y))))
stop("Call estimateSizeFactors first")
log(sizeFactors(y)) - mean(log(sizeFactors(y))) + mean(log(colSums(counts(y))))
}
glmFit.CountDataSet <- function (y, design = NULL, dispersion = NULL, offset = NULL,
weights = NULL, lib.size = NULL, start = NULL, method = "auto",
...)
{
stopifnot(is(y, "CountDataSet"))
if (is.null(dispersion)) {
if ("disp_pooled" %in% colnames(fData(y)))
dispersion <- fData(y)$disp_pooled
else if ("disp_blind" %in% colnames(fData(y))) {
if (fitInfo(y, "blind")$sharingMode != "fit-only")
warning("You have used 'method=\"blind\"' in estimateDispersion without also setting 'sharingMode=\"fit-only\"'. This will not yield useful results.")
dispersion <- fData(y)$disp_blind
}
else stop("Call 'estimateDispersions' with 'method=\"pooled\"' (or 'blind') first.")
}
if (is.null(offset) && is.null(lib.size))
offset <- getOffset.CountDataSet(y)
## UGLY HACK: DESeq can produce infinite dispersion estimates, which
## cause errors in glmFit. To fix this, we replace infinite
## dispersion estimates with the maximum representable floating
## point value, which should always result in a PValue of 1.
infdisp <- !is.finite(dispersion)
dispersion[infdisp] <- .Machine$double.xmax
fit <- glmFit.default(y = counts(y), design = design, dispersion = dispersion,
offset = offset, weights = weights, lib.size = lib.size,
start = start, method = method, ...)
## Now set the dispersions back to infinite in the resulting fit object.
fit$dispersion[infdisp] <- +Inf
dimnames(fit$coefficients) <- list(rownames(counts(y)), colnames(design))
fit$counts <- counts(y)
fit$samples <- pData(y)[-1]
fit$genes <- fData(y)[setdiff(names(fData(y)), c("disp_blind", "disp_pooled"))]
new("DGEGLM", fit)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment