Last active
September 3, 2024 11:49
-
-
Save hermidalc/acd818c752ab9a9a4137f6dd5e21bb1d to your computer and use it in GitHub Desktop.
RNA-seq normalization, differential expression, transformation, volcano plotting with DESeq2, edgeR, limma-voom
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(Biobase) | |
library(DESeq2) | |
library(edgeR) | |
library(EnhancedVolcano) | |
library(limma) | |
}) | |
fc <- 1.0 | |
lfc <- log2(fc) | |
padj <- 0.1 | |
# Benjamini-Hochberg FDR multiple testing correction (standard) | |
padj_meth <- "BH" | |
fig_dim <- 8 | |
fig_res <- 300 | |
xlim <- c(-4, 4) | |
ylim <- c(0, 4) | |
# data types to have ready or read in from csv/tsv files | |
# counts (feature x sample data matrix of integer counts) | |
# pdata (sample x phenotypic data dataframe) | |
# fdata (feature x feature annotation dataframe) | |
# Hugo et al. pretreatment anti-PD1 response dataset example | |
eset <- readRDS("srp070710_pre_ensg_htseq_counts.rds") | |
counts <- exprs(eset) | |
pdata <- pData(eset) | |
fdata <- fData(eset) | |
pdata$Group <- pdata$Class | |
# if you have batches and you want to model them | |
# pdata$Batch <- factor(pdata$Batch) | |
# pdata column with differential compararison of interest | |
# (e.g. subtype, metastatic) | |
pdata$Group <- factor(pdata$Group) | |
# if you have batches you want to include in model | |
# formula <- ~Batch + Group | |
formula <- ~Group | |
design <- model.matrix(formula, data=pdata) | |
#################################### DESeq2 #################################### | |
# setup and filtering | |
dds <- DESeqDataSetFromMatrix(counts, pdata, formula) | |
mcols(dds) <- DataFrame(mcols(dds), fdata) | |
dds <- dds[filterByExpr(counts, design), ] | |
# normalization and differential expression | |
dds <- DESeq(dds, quiet=TRUE) | |
results <- as.data.frame(lfcShrink( | |
dds, coef=length(resultsNames(dds)), type="apeglm", lfcThreshold=lfc, | |
quiet=TRUE | |
)) | |
# alternate older DESeq2 DE test (often useful) | |
# alpha <- ifelse(padj == 1, 0.99999, padj) | |
# results <- as.data.frame(results( | |
# dds, name=tail(resultsNames(dds), n=1), alpha=alpha, lfcThreshold=lfc, | |
# altHypothesis="greaterAbs", pAdjustMethod=padj_meth | |
# )) | |
# with DESeq2 you have to add fdata annotation columns to results manually | |
# for example to add fdata column "Symbol" | |
# always add columns before sorting below | |
if ("Symbol" %in% colnames(fdata)) { | |
results$Symbol <- mcols(dds)$Symbol | |
results <- results[, c( | |
"Symbol", colnames(results)[colnames(results) != "Symbol"] | |
)] | |
} | |
# lfcShrink apeglm s-values equivalent to adjusted p-values (or q-values) | |
if (lfc > 0 && ("svalue" %in% colnames(results))) { | |
results <- results[order(results$svalue), ] | |
} else { | |
results <- results[order(results$pvalue), ] | |
} | |
write.table( | |
data.frame("ID"=row.names(results), results), file="deseq2_results.tsv", | |
sep="\t", quote=FALSE, row.names=FALSE | |
) | |
# transform normalized data for downstream analysis | |
vst <- varianceStabilizingTransformation(dds, blind=FALSE) | |
vsd <- assay(vst) | |
# volcano plot | |
if ("Symbol" %in% colnames(results)) { | |
labels <- results$Symbol | |
} else { | |
labels <- row.names(results) | |
} | |
png( | |
file="deseq2_volcano.png", width=fig_dim, height=fig_dim, units="in", | |
res=fig_res | |
) | |
EnhancedVolcano( | |
results, | |
lab=labels, | |
x="log2FoldChange", | |
y=ifelse(lfc > 0 && ("svalue" %in% colnames(results)), "svalue", "padj"), | |
ylab=bquote(~-Log[10]~adjusted~italic(P)), | |
# xlim=xlim, | |
# ylim=ylim, | |
pCutoff=padj, | |
FCcutoff=lfc, | |
pointSize=3.0, | |
labSize=3.0, | |
drawConnectors=FALSE, | |
colConnectors="grey50", | |
title="Differential expression", | |
subtitle=paste( | |
"DESeq2: RLE normalization, Wald test", | |
ifelse(lfc > 0, ", lfcThreshold", "") | |
), | |
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="") | |
) | |
invisible(dev.off()) | |
##################################### edgeR #################################### | |
# setup and filtering | |
dge <- DGEList(counts=counts, genes=fdata) | |
dge <- dge[filterByExpr(dge, design), , keep.lib.sizes=FALSE] | |
# normalization and differential expression | |
dge <- calcNormFactors(dge, method="TMM") | |
dge <- estimateDisp(dge, design, robust=TRUE) | |
fit <- glmQLFit(dge, design, robust=TRUE) | |
if (lfc > 0) { | |
glt <- glmTreat(fit, coef=ncol(design), lfc=lfc) | |
} else { | |
glt <- glmQLFTest(fit, coef=ncol(design)) | |
} | |
results <- as.data.frame( | |
topTags(glt, n=Inf, adjust.method=padj_meth, sort.by="PValue") | |
) | |
write.table( | |
data.frame("ID"=row.names(results), results), file="edger_results.tsv", | |
sep="\t", quote=FALSE, row.names=FALSE | |
) | |
# transform normalized data for downstream analysis | |
cpms <- cpm(dge, log=TRUE) | |
# volcano plot | |
if ("Symbol" %in% colnames(results)) { | |
labels <- results$Symbol | |
} else { | |
labels <- row.names(results) | |
} | |
png( | |
file="edger_volcano.png", width=fig_dim, height=fig_dim, units="in", | |
res=fig_res | |
) | |
EnhancedVolcano( | |
results, | |
lab=labels, | |
x="logFC", | |
y="FDR", | |
ylab=bquote(~-Log[10]~adjusted~italic(P)), | |
# xlim=xlim, | |
# ylim=ylim, | |
pCutoff=padj, | |
FCcutoff=lfc, | |
pointSize=3.0, | |
labSize=3.0, | |
drawConnectors=FALSE, | |
colConnectors="grey50", | |
title="Differential expression", | |
subtitle=paste( | |
"edgeR: TMM normalization, QLF test", ifelse(lfc > 0, ", TREAT", "") | |
), | |
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="") | |
) | |
invisible(dev.off()) | |
################################## limma-voom ################################## | |
# setup and filtering | |
dge <- DGEList(counts=counts, genes=fdata) | |
dge <- dge[filterByExpr(dge, design), , keep.lib.sizes=FALSE] | |
# normalization and differential expression | |
dge <- calcNormFactors(dge, method="TMM") | |
v <- voom(dge, design) | |
fit <- lmFit(v, design) | |
if (lfc > 0) { | |
fit <- treat(fit, lfc=lfc, robust=TRUE) | |
results <- topTreat( | |
fit, coef=ncol(design), number=Inf, adjust.method=padj_meth, sort.by="P" | |
) | |
} else { | |
fit <- eBayes(fit, robust=TRUE) | |
results <- topTable( | |
fit, coef=ncol(design), number=Inf, adjust.method=padj_meth, sort.by="P" | |
) | |
} | |
write.table( | |
data.frame("ID"=row.names(results), results), file="voom_results.tsv", | |
sep="\t", quote=FALSE, row.names=FALSE | |
) | |
# transform normalized data for downstream analysis | |
cpms <- cpm(dge, log=TRUE) | |
# volcano plot | |
if ("Symbol" %in% colnames(results)) { | |
labels <- results$Symbol | |
} else { | |
labels <- row.names(results) | |
} | |
png( | |
file="voom_volcano.png", width=fig_dim, height=fig_dim, units="in", | |
res=fig_res | |
) | |
EnhancedVolcano( | |
results, | |
lab=labels, | |
x="logFC", | |
y="adj.P.Val", | |
ylab=bquote(~-Log[10]~adjusted~italic(P)), | |
# xlim=xlim, | |
# ylim=ylim, | |
pCutoff=padj, | |
FCcutoff=lfc, | |
pointSize=3.0, | |
labSize=3.0, | |
drawConnectors=FALSE, | |
colConnectors="grey50", | |
title="Differential expression", | |
subtitle=paste( | |
"limma-voom: TMM normalization", ifelse(lfc > 0, ", TREAT", "") | |
), | |
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="") | |
) | |
invisible(dev.off()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment