Skip to content

Instantly share code, notes, and snippets.

@hermidalc
Last active September 3, 2024 11:49
Show Gist options
  • Save hermidalc/acd818c752ab9a9a4137f6dd5e21bb1d to your computer and use it in GitHub Desktop.
Save hermidalc/acd818c752ab9a9a4137f6dd5e21bb1d to your computer and use it in GitHub Desktop.
RNA-seq normalization, differential expression, transformation, volcano plotting with DESeq2, edgeR, limma-voom
suppressPackageStartupMessages({
library(Biobase)
library(DESeq2)
library(edgeR)
library(EnhancedVolcano)
library(limma)
})
fc <- 1.0
lfc <- log2(fc)
padj <- 0.1
# Benjamini-Hochberg FDR multiple testing correction (standard)
padj_meth <- "BH"
fig_dim <- 8
fig_res <- 300
xlim <- c(-4, 4)
ylim <- c(0, 4)
# data types to have ready or read in from csv/tsv files
# counts (feature x sample data matrix of integer counts)
# pdata (sample x phenotypic data dataframe)
# fdata (feature x feature annotation dataframe)
# Hugo et al. pretreatment anti-PD1 response dataset example
eset <- readRDS("srp070710_pre_ensg_htseq_counts.rds")
counts <- exprs(eset)
pdata <- pData(eset)
fdata <- fData(eset)
pdata$Group <- pdata$Class
# if you have batches and you want to model them
# pdata$Batch <- factor(pdata$Batch)
# pdata column with differential compararison of interest
# (e.g. subtype, metastatic)
pdata$Group <- factor(pdata$Group)
# if you have batches you want to include in model
# formula <- ~Batch + Group
formula <- ~Group
design <- model.matrix(formula, data=pdata)
#################################### DESeq2 ####################################
# setup and filtering
dds <- DESeqDataSetFromMatrix(counts, pdata, formula)
mcols(dds) <- DataFrame(mcols(dds), fdata)
dds <- dds[filterByExpr(counts, design), ]
# normalization and differential expression
dds <- DESeq(dds, quiet=TRUE)
results <- as.data.frame(lfcShrink(
dds, coef=length(resultsNames(dds)), type="apeglm", lfcThreshold=lfc,
quiet=TRUE
))
# alternate older DESeq2 DE test (often useful)
# alpha <- ifelse(padj == 1, 0.99999, padj)
# results <- as.data.frame(results(
# dds, name=tail(resultsNames(dds), n=1), alpha=alpha, lfcThreshold=lfc,
# altHypothesis="greaterAbs", pAdjustMethod=padj_meth
# ))
# with DESeq2 you have to add fdata annotation columns to results manually
# for example to add fdata column "Symbol"
# always add columns before sorting below
if ("Symbol" %in% colnames(fdata)) {
results$Symbol <- mcols(dds)$Symbol
results <- results[, c(
"Symbol", colnames(results)[colnames(results) != "Symbol"]
)]
}
# lfcShrink apeglm s-values equivalent to adjusted p-values (or q-values)
if (lfc > 0 && ("svalue" %in% colnames(results))) {
results <- results[order(results$svalue), ]
} else {
results <- results[order(results$pvalue), ]
}
write.table(
data.frame("ID"=row.names(results), results), file="deseq2_results.tsv",
sep="\t", quote=FALSE, row.names=FALSE
)
# transform normalized data for downstream analysis
vst <- varianceStabilizingTransformation(dds, blind=FALSE)
vsd <- assay(vst)
# volcano plot
if ("Symbol" %in% colnames(results)) {
labels <- results$Symbol
} else {
labels <- row.names(results)
}
png(
file="deseq2_volcano.png", width=fig_dim, height=fig_dim, units="in",
res=fig_res
)
EnhancedVolcano(
results,
lab=labels,
x="log2FoldChange",
y=ifelse(lfc > 0 && ("svalue" %in% colnames(results)), "svalue", "padj"),
ylab=bquote(~-Log[10]~adjusted~italic(P)),
# xlim=xlim,
# ylim=ylim,
pCutoff=padj,
FCcutoff=lfc,
pointSize=3.0,
labSize=3.0,
drawConnectors=FALSE,
colConnectors="grey50",
title="Differential expression",
subtitle=paste(
"DESeq2: RLE normalization, Wald test",
ifelse(lfc > 0, ", lfcThreshold", "")
),
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="")
)
invisible(dev.off())
##################################### edgeR ####################################
# setup and filtering
dge <- DGEList(counts=counts, genes=fdata)
dge <- dge[filterByExpr(dge, design), , keep.lib.sizes=FALSE]
# normalization and differential expression
dge <- calcNormFactors(dge, method="TMM")
dge <- estimateDisp(dge, design, robust=TRUE)
fit <- glmQLFit(dge, design, robust=TRUE)
if (lfc > 0) {
glt <- glmTreat(fit, coef=ncol(design), lfc=lfc)
} else {
glt <- glmQLFTest(fit, coef=ncol(design))
}
results <- as.data.frame(
topTags(glt, n=Inf, adjust.method=padj_meth, sort.by="PValue")
)
write.table(
data.frame("ID"=row.names(results), results), file="edger_results.tsv",
sep="\t", quote=FALSE, row.names=FALSE
)
# transform normalized data for downstream analysis
cpms <- cpm(dge, log=TRUE)
# volcano plot
if ("Symbol" %in% colnames(results)) {
labels <- results$Symbol
} else {
labels <- row.names(results)
}
png(
file="edger_volcano.png", width=fig_dim, height=fig_dim, units="in",
res=fig_res
)
EnhancedVolcano(
results,
lab=labels,
x="logFC",
y="FDR",
ylab=bquote(~-Log[10]~adjusted~italic(P)),
# xlim=xlim,
# ylim=ylim,
pCutoff=padj,
FCcutoff=lfc,
pointSize=3.0,
labSize=3.0,
drawConnectors=FALSE,
colConnectors="grey50",
title="Differential expression",
subtitle=paste(
"edgeR: TMM normalization, QLF test", ifelse(lfc > 0, ", TREAT", "")
),
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="")
)
invisible(dev.off())
################################## limma-voom ##################################
# setup and filtering
dge <- DGEList(counts=counts, genes=fdata)
dge <- dge[filterByExpr(dge, design), , keep.lib.sizes=FALSE]
# normalization and differential expression
dge <- calcNormFactors(dge, method="TMM")
v <- voom(dge, design)
fit <- lmFit(v, design)
if (lfc > 0) {
fit <- treat(fit, lfc=lfc, robust=TRUE)
results <- topTreat(
fit, coef=ncol(design), number=Inf, adjust.method=padj_meth, sort.by="P"
)
} else {
fit <- eBayes(fit, robust=TRUE)
results <- topTable(
fit, coef=ncol(design), number=Inf, adjust.method=padj_meth, sort.by="P"
)
}
write.table(
data.frame("ID"=row.names(results), results), file="voom_results.tsv",
sep="\t", quote=FALSE, row.names=FALSE
)
# transform normalized data for downstream analysis
cpms <- cpm(dge, log=TRUE)
# volcano plot
if ("Symbol" %in% colnames(results)) {
labels <- results$Symbol
} else {
labels <- row.names(results)
}
png(
file="voom_volcano.png", width=fig_dim, height=fig_dim, units="in",
res=fig_res
)
EnhancedVolcano(
results,
lab=labels,
x="logFC",
y="adj.P.Val",
ylab=bquote(~-Log[10]~adjusted~italic(P)),
# xlim=xlim,
# ylim=ylim,
pCutoff=padj,
FCcutoff=lfc,
pointSize=3.0,
labSize=3.0,
drawConnectors=FALSE,
colConnectors="grey50",
title="Differential expression",
subtitle=paste(
"limma-voom: TMM normalization", ifelse(lfc > 0, ", TREAT", "")
),
caption=paste("FC cutoff = ", fc, "; FDR cutoff = ", padj, sep="")
)
invisible(dev.off())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment