Skip to content

Instantly share code, notes, and snippets.

@DarwinAwardWinner
Created January 5, 2016 16:00
Show Gist options
  • Save DarwinAwardWinner/61dd28e6fda8281369cb to your computer and use it in GitHub Desktop.
Save DarwinAwardWinner/61dd28e6fda8281369cb to your computer and use it in GitHub Desktop.
Example of running multiple variants of an analysis in R
#!/usr/bin/env Rscript
library(matrixStats)
library(plyr)
library(lubridate)
library(reshape2)
library(minfi)
library(limma)
library(sva)
library(celltypes450)
library(ggbiplot)
library(dendextend)
library(evaluate)
library(ggplot2)
library(stringr)
library(magrittr)
library(dplyr)
library(rms)
library(parallel)
options(cores=8, mc.cores=8)
try_or_err <- function(expr) {
try_capture_stack(substitute(expr), parent.frame())
}
suppressPlot <- function(arg) {
png("/dev/null")
on.exit(dev.off())
result <- arg
result
}
add.numbered.colnames <- function(x, prefix="C") {
x %>% set_colnames(sprintf("%s%i", prefix, seq(from=1, length.out=ncol(x))))
}
tsmsg <- function(...) message(date(), ": ", ...)
human.readable.duration <- function(x, ...) {
if (length(x) == 0)
return(character(0))
x <- duration(x, ...)
use.secs <- is.na(x) | [email protected] < 120
res <- sapply([email protected], lubridate:::compute_estimate)
res %<>% str_replace("^~", "")
res[use.secs] <- sprintf("%0.3g seconds", [email protected][use.secs])
approx <- res %>% str_split(" +", 2) %>%
sapply(. %>% { duration(as.numeric(.[1]), .[2])})
not.exact <- approx != x
res[str_detect(res, "^-?1 ")] %<>% str_replace("s$", "")
res[not.exact] %<>% str_c("~", .)
return(res)
}
timeTask <- function(name, expr) {
tsmsg("Starting task '", name, "'")
timing <- system.time(result <- withVisible(expr))
tsmsg(sprintf("Completed task '%s' in %s.", name,
human.readable.duration(timing["elapsed"], "seconds")))
print(timing)
if (result$visible) {
return(result$value)
} else {
return(invisible(result$value))
}
}
plotCellProportions <- function(cellprop, scale=FALSE) {
tidy.cellprop <- cellprop %>%
melt(id.vars="Sample",
varnames = c("Sample", "CellType"),
value.name = "Proportion")
ggplot(tidy.cellprop) +
aes(x=Sample, y=Proportion, fill=CellType) +
geom_bar(stat="identity", position=if (scale) "fill" else "stack") +
scale_fill_brewer(palette="Set1")
}
reg.logit2 <- function(x, eps=0) {
log2(x + eps) - log2(1 - x + eps)
}
as.numeric.index <- function(i, names) {
if (is.character(i)) {
return(match(i, names))
} else if (is.logical(i)) {
return(which(i))
} else if (is.numeric(i)) {
return(i)
} else {
stop(sprintf("Unknown index type: '%s'", class(i)))
}
}
## Return an index that selects all the elements that the given index
## i would NOT select. In other words, the complementary index of i
invert.index <- function(i, ...) {
-as.numeric.index(i, ...)
}
make.centered.cell.proportion.design <- function(cellprop,
intcol=which.max(colMeans(cellprop)),
includeIntercept=FALSE) {
selected.cols <- invert.index(intcol, colnames(cellprop))
reduced <- cellprop[,selected.cols]
if (ncol(cellprop) - ncol(reduced) != 1) {
stop("Must select exactly one column as the intercept")
}
centered <- scale(reduced, center=TRUE, scale=FALSE)
if (includeIntercept) {
centered %<>% cbind(`(Intercept)`=1, .)
}
centered
}
clamp.values <- function(x, floor=0, ceiling=1, epsilon=0) {
floor <- floor + epsilon
ceiling <- ceiling - epsilon
stopifnot(floor <= ceiling)
x %>% pmax(floor) %>% pmin(ceiling)
}
gc.after <- function(expr) {
on.exit(gc())
expr
}
## Version of vooma with all the relevant features of voom
vooma2 <- function (y, design = NULL, normalize.method = "none",
plot = FALSE, span = NULL, ...)
{
out <- list()
ngenes <- nrow(y)
isExpressionSet <- suppressPackageStartupMessages(
is(y, "ExpressionSet"))
if (isExpressionSet) {
if (length(Biobase::fData(y)))
out$genes <- Biobase::fData(y)
if (length(Biobase::pData(y)))
out$targets <- Biobase::pData(y)
y <- Biobase::exprs(y)
}
else {
y <- as.matrix(y)
}
if (is.null(design)) {
design <- matrix(1, ncol(y), 1)
rownames(design) <- colnames(y)
colnames(design) <- "GrandMean"
}
y <- normalizeBetweenArrays(y, method = normalize.method)
fit <- lmFit(y, design, ...)
if (is.null(fit$Amean))
fit$Amean <- rowMeans(y, na.rm = TRUE)
sx <- rowMeans(y)
sy <- sqrt(fit$sigma)
if (is.null(span))
if (ngenes <= 10)
span <- 1
else span <- 0.3 + 0.7 * (10/ngenes)^0.5
l <- lowess(sx, sy, f = span)
if (plot) {
plot(sx, sy, xlab = "Average log2 expression", ylab = "Sqrt( standard deviation )",
pch = 16, cex = 0.25)
title("vooma: Mean-variance trend")
lines(l, col = "red")
}
f <- approxfun(l, rule = 2)
if (fit$rank < ncol(design)) {
j <- fit$pivot[1:fit$rank]
fitted.values <- fit$coef[, j, drop = FALSE] %*% t(fit$design[,
j, drop = FALSE])
}
else {
fitted.values <- fit$coef %*% t(fit$design)
}
w <- 1/f(fitted.values)^4
dim(w) <- dim(y)
colnames(w) <- colnames(y)
rownames(w) <- rownames(y)
out$E <- y
out$other <- list(fitted.values=fitted.values)
out$meanvar.trend <- data.frame(x = sx, y = sy)
out$weights <- w
out$design <- design
out$span <- span
new("EList", out)
}
voomaWithQualityWeights <-
function (y, design = NULL, normalize.method = "none",
plot = FALSE, span = NULL, var.design = NULL,
method = "genebygene", maxiter = 50, tol = 1e-10,
trace = FALSE, replace.weights = TRUE, col = NULL, ...)
{
if (plot) {
oldpar <- par(mfrow = c(1, 2))
on.exit(par(oldpar))
}
v <- vooma2(y, design = design, normalize.method = normalize.method,
plot = FALSE, span = span, ...)
aw <- arrayWeights(v, design = design, method = method, maxiter = maxiter,
tol = tol, var.design = var.design)
v <- vooma2(y, design = design, weights = aw,
normalize.method = normalize.method, plot = plot, span = span,
...)
aw <- arrayWeights(v, design = design, method = method, maxiter = maxiter,
tol = tol, trace = trace, var.design = var.design)
wts <- asMatrixWeights(aw, dim(v)) * v$weights
attr(wts, "arrayweights") <- NULL
if (plot) {
barplot(aw, names = 1:length(aw), main = "Sample-specific weights",
ylab = "Weight", xlab = "Sample", col = col)
abline(h = 1, col = 2, lty = 2)
}
if (replace.weights) {
v$weights <- wts
v$sample.weights <- aw
return(v)
}
else {
return(wts)
}
}
timeTask("Loading data", {
RGSet <- readRDS("dataProcessQC/RGSet.RDS")
targets <- pData(RGSet) %>%
dplyr::select(
Sample_Name,
TStatus,
Diabetes,
Sex, Age,
Replicate=Replcate,
Well=Sample_Well, Plate=Sample_Plate,
Array, Slide,
Basename) %>%
mutate(Basename=basename(Basename),
Sample_Name=as.character(Sample_Name),
TStatus=TStatus %>% factor %>%
relevel(ref="TX") %>%
revalue(c(`CAN/IFTA`="CAN")),
Sex=factor(Sex),
Diabetes=factor(Diabetes),
Well=factor(Well),
Plate=factor(Plate),
Array=factor(Array),
Slide=factor(Slide),
Replicate=factor(Replicate))
})
timeTask("Getting SNPs from RGSet", {
sample.snpbeta <- getSnpBeta(RGSet)
sample.snps <- (round(sample.snpbeta * 2)/2)
## Make dendrogram
snpdist.cor <- (1-abs(cor(t(sample.snps)))) %>% as.dist
snpdist.manh <- dist(sample.snps, method="manhattan")
snpclust.cor <- hclust(snpdist.cor)
snpclust.manh <- hclust(snpdist.manh)
snpdendro.cor <- snpclust.cor %>% as.dendrogram %>% hang.dendrogram
snpdendro.manh <- snpclust.manh %>% as.dendrogram %>% hang.dendrogram
dir.create("snpplots", FALSE, TRUE)
pdf("snpplots/snpdendro.pdf", height=12, width=8)
plot(snpdendro.manh, horiz = TRUE, main="SNP dendrogram (manhattan distance)")
plot(snpdendro.cor, horiz = TRUE, main="SNP dendrogram (dist=1-abs(corr))")
invisible(dev.off())
})
timeTask("Proprocessing RGSet to GRatioSet", {
GRatioSet <- RGSet %>% preprocessSWAN %>% mapToGenome %>% ratioConvert %>%
dropLociWithSnps(snps=c("SBE","CpG"), maf=0)
})
timeTask("Estimating cell type proportions", {
dir.create("plots", FALSE, TRUE)
pdf("plots/cellprop-qc.pdf", height=12, width=12)
RGTemp <- RGSet
pData(RGTemp)$Sample_Name %<>% as.character
pData(RGTemp)$Slide %<>% as.numeric
cellprop <- gc.after(estimateCellCounts(
RGTemp,
cellTypes = c("CD8T","CD4T", "NK","Bcell","Mono"),
meanPlot=TRUE)) %>%
pmax(0)
print(plotCellProportions(cellprop, TRUE) +
coord_flip())
print(ggbiplot(princomp(cellprop)) +
aes(color=targets$TStatus) +
scale_color_brewer(palette="Set2"))
dev.off()
rm(RGTemp)
})
{
tsmsg("Conducting ANOVA of cell proportions against covariates")
print(anova(lm(cellprop ~ TStatus + Diabetes + Sex + Age, data=targets)))
tsmsg("Conducting ANOVA of logit(cell proportions) against covariates")
print(anova(lm(reg.logit2(cellprop, 0.01) ~ TStatus + Diabetes + Sex + Age, data=targets)))
}
timeTask("Computing adjusted betas by fixed effects model", {
eps <- getBeta(GRatioSet) %>% {min(., 1-.)} / 2
assay(GRatioSet, "Beta.Adjusted.FE") <-
getBeta(GRatioSet) %>%
removeBatchEffect(covariates = make.centered.cell.proportion.design(cellprop)) %>%
clamp.values(floor=0, ceiling=1, epsilon=eps)
})
timeTask("Computing adjusted betas by mixed effects model", {
assay(GRatioSet, "Beta.Adjusted.LME") <-
adjust.beta(getBeta(GRatioSet), omega.mix=cellprop, mc.cores=8)
})
x <- cbind(Unadj=as.vector(assay(GRatioSet, "Beta")),
FE=as.vector(assay(GRatioSet, "Beta.Adjusted.FE")),
LME=as.vector(assay(GRatioSet, "Beta.Adjusted.LME")))
## Select SNP sets
timeTask("Removing known covariates from M values for SNP selection", {
dupfilter <- !duplicated(targets$Sample)
design.snp <- model.matrix(~Diabetes + Sex + Age, targets[dupfilter,])
preserve.cols <- 1
## Raw M-values
Mvals <- list(
Unadj=getM(GRatioSet)[,dupfilter],
FEAdj=logit2(assay(GRatioSet, "Beta.Adjusted.FE"))[,dupfilter],
LMEAdj=logit2(assay(GRatioSet, "Beta.Adjusted.LME"))[,dupfilter])
## M-values after removing batch effects
Mvals.bs <-
mclapply(Mvals,
. %>%
removeBatchEffect(
design=design.snp[,preserve.cols, drop=FALSE],
covariates=design.snp[,-preserve.cols, drop=FALSE]))
allMvals <- c(Mvals, Mvals.bs %>% setNames(str_c(names(.), ".bs")))
})
timeTask("Computing PCoA for SNP selection", {
dmats <- suppressPlot(mclapply(allMvals, . %>% plotMDS %$% distance.matrix %>% as.dist, mc.cores=4))
mds <- suppressWarnings(lapply(dmats, . %>% cmdscale(k=attr(., "Size") - 1, eig=TRUE)))
mdsdims <- lapply(mds, . %$% points %>% add.numbered.colnames("PC"))
})
timeTask("Performing logistic regression of SNPs against PCs", {
snps.rmdup <- sample.snps[,dupfilter]
ndims <- 10
snpframes <- lapply(mdsdims, function(m) {
apply(snps.rmdup, 1, function(snp) {
data.frame(snp=factor(snp, ordered=TRUE), m[,seq_len(ndims), drop=FALSE])
})
})
snp.pvals <- sapply(snpframes, function(dflist) {
lapply(dflist, function(df) {
depvar <- colnames(df)[1]
covars <- colnames(df)[-1]
altformula <- as.formula(str_c(depvar, " ~ ", str_c(covars, collapse="+")))
nullformula <- as.formula(str_c(depvar, " ~ 1"))
## Use penalized regression to deal with perfect separation problem
lrm(altformula, data=df, penalty=1) %>% anova %>% .["TOTAL", "P"]
## fit1 <- polr(altformula, data=df, Hess=TRUE)
## fit0 <- polr(nullformula, data=df, Hess=TRUE)
## atest <- anova(fit1, fit0)
## atest$`Pr(Chi)`[2]
}) %>% unlist
})
snp.fdr <- apply(snp.pvals, 2, p.adjust, method="BH")
snp.ranks <- apply(snp.pvals, 2, function(p) {seq_len(length(p))[order(order(p))]}) %>%
set_rownames(rownames(snp.pvals))
## Plot SNP dendrograms with ranks for each version
pdf("snpplots/snpdendro-ranked.pdf", height=14, width=10)
for (i in colnames(snp.pvals)) local({
title <- sprintf("SNP Dendrogram (ranked by logit reg vs %s data)", i)
tempclust <- snpclust.manh
snplabels <- sprintf("%s: %s, FDR = %0.3g",
snp.ranks[tempclust$labels,i],
tempclust$labels,
snp.fdr[tempclust$labels,i])
tempclust$labels <- snplabels
tempdendro <- tempclust %>% as.dendrogram %>% hang.dendrogram
plot(tempdendro, horiz = TRUE, main=title)
})
dev.off()
## Make SNP MDS plots
for (i in colnames(snp.pvals)) local({
o <- order(snp.ranks[,i])
snpnames <- rownames(snp.pvals)[o]
snpfdrs <- snp.fdr[o,i]
snpranks <- snp.ranks[o,i]
pdf(sprintf("snpplots/SNP_MDS_%s.pdf", i))
for (s in seq_len(length(snpnames))) {
frame <- snpframes[[i]][[snpnames[s]]] %>% cbind(targets[dupfilter,]) %>%
mutate(Sex=factor(Sex))
frame$SNP <- frame$snp %>% as.character %>% as.numeric
title <- sprintf("MDS plot colored by SNP %s; Rank %s, FDR = %0.3g",
snpnames[s], snpranks[s], snpfdrs[s])
print(ggplot(frame) +
aes(x=PC1, y=PC2, color=SNP, shape=TStatus:Sex) +
geom_point(size=5) +
scale_color_gradient2(midpoint = 0.5, mid="gray40") +
scale_shape_manual(
values=c(16, 1,
15, 0,
17, 2,
18, 5)) +
coord_fixed() +
ggtitle(title))
}
dev.off()
})
})
## Determined by looking at the above MDS plots and dendrogram
## (Theoretically I should revisit them after switching to penalized
## regression, but it seems the SNP idea is garbage anyway, so never
## mind.)
snpsets <- list(
origsnps=c(
SNP1="rs6426327",
SNP2="rs472920",
SNP3="rs2468330",
SNP4="rs133860",
SNP5="rs1467387"),
batchsub=c(
SNP1="rs6426327",
SNP2="rs472920" ,
SNP4="rs5987737",
SNP7="rs133860",
SNP9="rs1484127"),
adj.batchsub=c(
SNP1="rs10033147",
SNP2="rs9292570",
SNP3="rs2804694",
SNP6="rs1484127",
SNP7="rs1941955"))
save.image("saved.RData")
run.variant <- function(adjustForCellProportions=c("none", "adj.beta.fe", "adj.beta.lme", "adj.M"),
regressionScale=c("M", "beta"),
handleDuplicates=c("dupcor", "remove", "none"),
snps=NULL,
handleVarTrend=c("vooma", "trend", "none"),
handleHiddenVar=c("sva", "peer", "none"),
useArrayWeights=FALSE,
randomSeed) {
adjustForCellProportions <- match.arg(adjustForCellProportions)
regressionScale <- match.arg(regressionScale)
handleVarTrend <- match.arg(handleVarTrend)
handleDuplicates <- match.arg(handleDuplicates)
handleHiddenVar <- match.arg(handleHiddenVar)
selected.options <-
mget(c("adjustForCellProportions", "regressionScale", "handleDuplicates",
"snps", "handleVarTrend", "handleHiddenVar", "useArrayWeights",
"randomSeed"))
if (regressionScale == "beta" && adjustForCellProportions == "adj.M") {
stop("M-value adjustment makes no sense when performing analysis on beta values.")
}
vars.to.return <- character(0)
add.return.var <- function(...) {
vars.to.return <<- c(vars.to.return, ...)
}
## Return all the inputs
add.return.var(
"adjustForCellProportions", "regressionScale", "handleDuplicates",
"snps", "handleVarTrend", "handleHiddenVar", "useArrayWeights",
"randomSeed")
## Allow reproducibility
if (!missing(randomSeed) && !is.null(randomSeed)) {
set.seed(randomSeed)
add.return.var("randomSeed")
tsmsg("Setting random seed to ", deparse(randomSeed))
}
## Cell proportion adjustment
if (adjustForCellProportions == "adj.beta.lme") {
tsmsg("Using betas adjusted for cell proportions via linear mixed effects model")
cellprop <- cellprop
add.return.var("cellprop")
## It's already pre-computed
beta <- assay(GRatioSet, "Beta.Adjusted.LME")
} else if (adjustForCellProportions == "adj.beta.fe" && regressionScale == "M") {
tsmsg("Using betas adjusted for cell proportions via ordinary (fixed effect) linear model")
cellprop <- cellprop
add.return.var("cellprop")
## It's already pre-computed
beta <- assay(GRatioSet, "Beta.Adjusted.FE")
} else if (adjustForCellProportions == "adj.beta.fe" && regressionScale == "beta") {
tsmsg("Using unadjusted betas, since cell-type adjustment will happen during model fitting")
beta <- getBeta(GRatioSet)
} else {
tsmsg("Using unadjusted betas")
beta <- getBeta(GRatioSet)
}
add.return.var("beta")
## Make a local copy of targets
targets <- targets
add.return.var("targets")
## Dup removal
if (handleDuplicates == "remove") {
tsmsg("Removing duplicates")
dupfilter <- !duplicated(targets$Sample)
targets <- targets[dupfilter,]
beta <- beta[,dupfilter]
}
if (regressionScale == "M") {
timeTask("Transforming betas to M-values for regression", {
elist <- new("EList", list(E = logit2(beta)))
})
} else {
tsmsg("Performing regression on beta values")
elist <- new("EList", list(E = beta))
}
add.return.var("elist")
## Base designs
design1 <- model.matrix(~TStatus + Diabetes + Sex + Age, targets)
design0 <- model.matrix(~Diabetes + Sex + Age, targets)
add.return.var("design1", "design0")
## SNP
if (length(snps) == 0) {
snps <- NULL
}
if (is.vector(snps)) {
snps <- sample.snps[snps,]
add.return.var("snps")
}
if (is.matrix(snps)) {
if (handleDuplicates == "remove") {
snps <- snps[dupfilter,]
}
tsmsg("Adding selected SNPs to design matrix: ", deparse(rownames(snps)))
design1 %<>% cbind(t(snps))
design0 %<>% cbind(t(snps))
} else if (!is.null(snps)) {
stop("Invalid snps argument")
}
## cell proportions as covariates
if (adjustForCellProportions == "adj.M" ||
(adjustForCellProportions == "adj.beta.fe" && regressionScale == "beta")) {
## Make a local copy
cellprop <- cellprop
add.return.var("cellprop")
## Normalize row sums to exactly one, since
## estimateCellCounts doesn't guarantee this.
cellprop %<>% divide_by(rowSums(.))
## TODO make this threshold a parameter
present.celltypes <- colMaxs(cellprop) >= 0.05
if (any(!present.celltypes)) {
cellprop <- cellprop[,present.celltypes]
cellprop %<>% divide_by(rowSums(.))
}
if (ncol(cellprop) > 1) {
tsmsg("Adding cell type proportions as covariates in design matrix")
## Delete the column with the smallest overall proportion
cellpropdesign = make.centered.cell.proportion.design(cellprop, includeIntercept = FALSE)
design1 %<>% cbind(cellpropdesign)
design0 %<>% cbind(cellpropdesign)
} else{
tsmsg("Not adding cell type proportions as covariates in design matrix because multiple cell types are not present")
}
}
## sva/PEER
svobj <- NULL
switch(handleHiddenVar,
sva=timeTask("Running sva", {
svobj <- sva(as.matrix(elist), design1, design0)
svobj$sv %<>% set_colnames(sprintf("SV%i", seq(ncol(.))))
tsmsg(sprintf("Got %i significant surrogate variables", ncol(svobj$sv)))
design1 %<>% cbind(svobj$sv)
design0 %<>% cbind(svobj$sv)
add.return.var("svobj")
}),
peer=timeTask("Running PEER", {
stop("Unimplemented")
}),
none={
tsmsg("Not adjusting for hidden variables")
})
## arrayWeights and/or vooma
if (useArrayWeights) {
if (handleVarTrend == "vooma") {
timeTask("Computing sample and observation weights", {
elist <- voomaWithQualityWeights(elist, design1)
})
} else {
timeTask("Computing sample weights", {
elist$sample.weights <- arrayWeights(elist, design1)
elist$weights <- elist$sample.weights %>% asMatrixWeights(dim(elist))
})
}
}
else {
if (handleVarTrend == "vooma") {
timeTask("Computing vooma observation weights",{
elist <- vooma2(elist, design1)
})
} else {
tsmsg("Not using sample weights")
}
}
## dupCor
if (handleDuplicates == "dupcor") {
timeTask("Running duplicateCorrelation", {
corfit <- suppressWarnings(duplicateCorrelation(elist, design1, block=targets$Sample))
tsmsg("Got consensus correlation of ", corfit$consensus.correlation)
})
add.return.var("corfit")
## Need to re-run vooma with correlation and block
if (handleVarTrend == "vooma") {
if (useArrayWeights) {
timeTask("Re-computing sample and observation weights after duplicateCorrelation", {
elist <- voomaWithQualityWeights(
elist, design1,
correlation = corfit$consensus.correlation,
block=targets$Sample)
})
} else {
timeTask("Re-computing vooma observation weights after duplicateCorrelation", {
elist <- vooma2(
elist, design1,
correlation = corfit$consensus.correlation,
block=targets$Sample)
})
}
}
}
fit.args <- list(object=elist, design=design1)
if (handleDuplicates == "dupcor") {
fit.args$correlation <- corfit$consensus.correlation
fit.args$block <- targets$Sample
}
timeTask("Fitting linear model", {
fit <- do.call(lmFit, fit.args) %>% eBayes(robust=TRUE, trend=handleVarTrend == "trend")
})
add.return.var("fit")
coef.tests <- list(ARvsTX="TStatusAR", ADNRvsTX="TStatusADNR", CANvsTX="TStatusCAN",
DiabetesT2vsT1="DiabetesT2D", MvsF="SexM", Age="Age")
if (!is.null(snps)) {
coef.tests$SNPs <- rownames(snps)
}
if (adjustForCellProportions == "adj.M" ||
(adjustForCellProportions == "adj.beta.fe" && regressionScale == "beta")) {
coef.tests$CellType <- colnames(cellpropdesign)
}
if (!is.null(svobj)) {
coef.tests$SV <- colnames(svobj$sv)
}
add.return.var("coef.tests")
## TODO PEER
timeTask("Generating results tables", {
results <- lapply(coef.tests, . %>% topTable(fit=fit, coef=., n=Inf))
})
add.return.var("results")
## Collect a summary of all relevant results
results.summary <- list()
add.return.var("results.summary")
results.summary$SignifCounts <-
data.frame(
ESTDM=results %>%
sapply(. %$% P.Value %>% propTrueNull %>% {1 - .} %>%
multiply_by(nrow(results[[1]]))) %>%
round,
FDR10=results %>%
sapply(. %$% adj.P.Val %>% is_weakly_less_than(0.1) %>% sum),
FDR15=results %>%
sapply(. %$% adj.P.Val %>% is_weakly_less_than(0.15) %>% sum))
results.summary$ModelSummary <- list(
DesignDF=ncol(fit$design),
ResidDF=max(fit$df.residual),
PriorDF=fit$df.prior %>% { if (length(.) <= 1) . else summary(.) },
ModelVariance=summary(fit$s2.post))
if (handleDuplicates == "dupcor") {
results.summary$DuplicateCorrelation <- corfit$consensus.correlation
}
if (handleHiddenVar == "sva") {
results.summary$NumSV <- ncol(svobj$sv)
}
if (useArrayWeights) {
results.summary$ArrayWeights <- summary(elist$sample.weights)
}
results.summary$Options <- selected.options
## Save sessionInfo in results
session.info <- sessionInfo()
add.return.var("session.info")
return(mget(vars.to.return))
}
arglists <- list(
## proto=list(
## adjustForCellProportions=c("none", "adj.beta", "adj.beta.lme", "adj.M"),
## regressionScale=c("M", "beta"),
## handleDuplicates=c("dupcor", "remove", "none"),
## snps=NULL,
## handleVarTrend=c("vooma", "trend", "none"),
## handleHiddenVar=c("sva", "peer", "none"),
## useArrayWeights=FALSE,
## randomSeed),
unadj.naive=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="none",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
unadj.rmdups=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="remove",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
unadj.dupcor=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
unadj.dupcor.sva=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
unadj.dupcor.sva.aw=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
unadj.dupcor.sva.vooma=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
unadj.dupcor.snps.sva.voomaw=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=snpsets$batchsub,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
unadj.dupcor.sva.voomaw=list(
adjustForCellProportions="none",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
adjBetaLME.dupcor=list(
adjustForCellProportions="adj.beta.lme",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
adjBetaLME.dupcor.sva=list(
adjustForCellProportions="adj.beta.lme",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
adjBetaLME.dupcor.sva.aw=list(
adjustForCellProportions="adj.beta.lme",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
adjBetaLME.dupcor.sva.vooma=list(
adjustForCellProportions="adj.beta.lme",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
adjBetaLME.dupcor.sva.voomaw=list(
adjustForCellProportions="adj.beta.lme",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
adjM.dupcor=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
adjM.dupcor.snps=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=snpsets$batchsub,
handleVarTrend="trend",
handleHiddenVar="none",
useArrayWeights=FALSE,
randomSeed=1986),
adjM.dupcor.sva=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
adjM.dupcor.sva.aw=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="trend",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
adjM.dupcor.sva.vooma=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=FALSE,
randomSeed=1986),
adjM.dupcor.sva.voomaw=list(
adjustForCellProportions="adj.M",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
adjBetaFE.dupcor.voomaw=list(
adjustForCellProportions="adj.beta.fe",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="none",
useArrayWeights=TRUE,
randomSeed=1986),
adjBetaFE.dupcor.sva.voomaw=list(
adjustForCellProportions="adj.beta.fe",
regressionScale="M",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
betascale.dupcor.sva.voomaw=list(
adjustForCellProportions="none",
regressionScale="beta",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986),
betascale.dupcor.cellprop.voomaw=list(
adjustForCellProportions="adj.beta.fe",
regressionScale="beta",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="none",
useArrayWeights=TRUE,
randomSeed=1986),
betascale.dupcor.cellprop.sva.voomaw=list(
adjustForCellProportions="adj.beta.fe",
regressionScale="beta",
handleDuplicates="dupcor",
snps=NULL,
handleVarTrend="vooma",
handleHiddenVar="sva",
useArrayWeights=TRUE,
randomSeed=1986))
variants.to.run <- names(arglists)
dir.create("analyses", FALSE, TRUE)
for (i in variants.to.run) local({
on.exit(gc())
save.file <- file.path("analyses", sprintf("analysis-%s.RDS", i))
summary.file <- file.path("analyses", sprintf("summary-%s.RDS", i))
error.file <- file.path("analyses", sprintf("error-%s.RDS", i))
if (file.exists(save.file)) {
tsmsg("Skipping already done analysis ", i)
tsmsg("Printing previous results for this analysis:")
print(readRDS(summary.file))
} else {
timeTask(str_c("Running full pipeline for ", i), {
result <- try_or_err({
analysis <- do.call(run.variant, arglists[[i]])
timeTask("Saving results", {
saveRDS(analysis, save.file)
saveRDS(analysis$results.summary, summary.file)
})
tsmsg(sprintf("Summary for %s:", i))
print(analysis$results.summary)
})
if (is(result, "error")) {
tsmsg("ERROR: Failed during ", i, " analysis. Saving error (including stacktrace) to ", deparse(error.file), ". Also printing stack trace:")
print(result)
print(result$calls)
saveRDS(result, error.file)
}
})
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment