DarwinAwardWinner · May 22, 2015 20:47
diff --git a/glmFit_CountDataSet.R b/glmFit_CountDataSet.R
 ## The following two functions allow the use to edgeR's infrastructure
 ## to execute the DESeq method. Instead of glmFit(dge, design), use
 ## glmFit.CountDataSet(cds, design), then continue as with a normal
 ## edgeR analysis.
 getOffset.CountDataSet <- function(y) {
  if (any(is.na(sizeFactors(y))))
    stop("Call estimateSizeFactors first")
  log(sizeFactors(y)) - mean(log(sizeFactors(y))) + mean(log(colSums(counts(y))))
 }

 glmFit.CountDataSet <- function (y, design = NULL, dispersion = NULL, offset = NULL,
                                 weights = NULL, lib.size = NULL, start = NULL, method = "auto",
                                 ...)
 {
  stopifnot(is(y, "CountDataSet"))
  if (is.null(dispersion)) {
    if ("disp_pooled" %in% colnames(fData(y)))
      dispersion <- fData(y)$disp_pooled
    else if ("disp_blind" %in% colnames(fData(y))) {
      if (fitInfo(y, "blind")$sharingMode != "fit-only")
        warning("You have used 'method=\"blind\"' in estimateDispersion without also setting 'sharingMode=\"fit-only\"'. This will not yield useful results.")
      dispersion <- fData(y)$disp_blind
    }
    else stop("Call 'estimateDispersions' with 'method=\"pooled\"' (or 'blind') first.")
  }
  if (is.null(offset) && is.null(lib.size))
    offset <- getOffset.CountDataSet(y)
  ## UGLY HACK: DESeq can produce infinite dispersion estimates, which
  ## cause errors in glmFit. To fix this, we replace infinite
  ## dispersion estimates with the maximum representable floating
  ## point value, which should always result in a PValue of 1.
  infdisp <- !is.finite(dispersion)
  dispersion[infdisp] <- .Machine$double.xmax
  fit <- glmFit.default(y = counts(y), design = design, dispersion = dispersion,
                        offset = offset, weights = weights, lib.size = lib.size,
                        start = start, method = method, ...)
  ## Now set the dispersions back to infinite in the resulting fit object.
  fit$dispersion[infdisp] <- +Inf
  dimnames(fit$coefficients) <- list(rownames(counts(y)), colnames(design))
  fit$counts <- counts(y)
  fit$samples <- pData(y)[-1]
  fit$genes <- fData(y)[setdiff(names(fData(y)), c("disp_blind", "disp_pooled"))]
  new("DGEGLM", fit)
 }
	## The following two functions allow the use to edgeR's infrastructure
	## to execute the DESeq method. Instead of glmFit(dge, design), use
	## glmFit.CountDataSet(cds, design), then continue as with a normal
	## edgeR analysis.
	getOffset.CountDataSet <- function(y) {
	if (any(is.na(sizeFactors(y))))
	stop("Call estimateSizeFactors first")
	log(sizeFactors(y)) - mean(log(sizeFactors(y))) + mean(log(colSums(counts(y))))
	}

	glmFit.CountDataSet <- function (y, design = NULL, dispersion = NULL, offset = NULL,
	weights = NULL, lib.size = NULL, start = NULL, method = "auto",
	...)
	{
	stopifnot(is(y, "CountDataSet"))
	if (is.null(dispersion)) {
	if ("disp_pooled" %in% colnames(fData(y)))
	dispersion <- fData(y)$disp_pooled
	else if ("disp_blind" %in% colnames(fData(y))) {
	if (fitInfo(y, "blind")$sharingMode != "fit-only")
	warning("You have used 'method=\"blind\"' in estimateDispersion without also setting 'sharingMode=\"fit-only\"'. This will not yield useful results.")
	dispersion <- fData(y)$disp_blind
	}
	else stop("Call 'estimateDispersions' with 'method=\"pooled\"' (or 'blind') first.")
	}
	if (is.null(offset) && is.null(lib.size))
	offset <- getOffset.CountDataSet(y)
	## UGLY HACK: DESeq can produce infinite dispersion estimates, which
	## cause errors in glmFit. To fix this, we replace infinite
	## dispersion estimates with the maximum representable floating
	## point value, which should always result in a PValue of 1.
	infdisp <- !is.finite(dispersion)
	dispersion[infdisp] <- .Machine$double.xmax
	fit <- glmFit.default(y = counts(y), design = design, dispersion = dispersion,
	offset = offset, weights = weights, lib.size = lib.size,
	start = start, method = method, ...)
	## Now set the dispersions back to infinite in the resulting fit object.
	fit$dispersion[infdisp] <- +Inf
	dimnames(fit$coefficients) <- list(rownames(counts(y)), colnames(design))
	fit$counts <- counts(y)
	fit$samples <- pData(y)[-1]
	fit$genes <- fData(y)[setdiff(names(fData(y)), c("disp_blind", "disp_pooled"))]
	new("DGEGLM", fit)
	}