Skip to content

Instantly share code, notes, and snippets.

@ohofmann
Created April 21, 2012 01:00
Show Gist options
  • Save ohofmann/2433014 to your computer and use it in GitHub Desktop.
Save ohofmann/2433014 to your computer and use it in GitHub Desktop.
library(ReadqPCR)
library(NormqPCR)
library(pheatmap)
library(RColorBrewer)
library(ggplot2)
######################################################################
# Setup
######################################################################
# Pairwise correlations
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y, use="pairwise"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.6/strwidth(txt)
text(0.5, 0.5, txt, cex=cex )
}
######################################################################
# Parse data
######################################################################
#
# We have technical and biological replicates, well information
# is available. Had to drop NCT reads as they did not have the
# same number of technical replicates. Odd limitation.
pcrData <- file.path('.', 'data', 'pcr_data_clean_rev.txt')
# Parse the reformated PCR data
qPCRBatch.qPCR <- read.qPCR(pcrData, verbose=T)
# No Ct above 35, no additional filtering required
qPCRBatch.qPCR
sampleNames(qPCRBatch.qPCR)
summary(exprs(qPCRBatch.qPCR))[, c(1:3)]
summary(exprs(qPCRBatch.qPCR))[, c(4:6)]
head(exprs.well.order(qPCRBatch.qPCR))
# Combine the technical replicates (arithmetic mean)
combined <- combineTechReps(qPCRBatch.qPCR)
exprs(combined)
# Add group information
group <- c(rep('negative', 3), rep('positive', 3))
phenoData <- pData(combined)
phenoData$group <- group
pData(combined) <- phenoData
######################################################################
# Normalization tests
######################################################################
# Compare GAPDH normalization with Sangita's results
hk <- 'GAPDH'
exprs(combined)[hk, ]
combined.norm <- deltaCt(qPCRBatch=combined,
hkgs=hk,
calc='arith')
exprs(combined)
exprs(combined.norm)
write.table(exprs(combined.norm), file='qPCR_Norm_GAPDH.txt', sep='\t', quote=F)
# Plot before/after normalization. Not a whole lot of difference
pdf('gapdhNormalization.pdf')
par(mfrow=c(2,2))
qPCR.e <- na.exclude(as.vector(exprs(combined)))
norm.e <- na.exclude(as.vector(exprs(combined.norm)))
xr <- range(c(qPCR.e, norm.e))
hist(qPCR.e, breaks=30, prob=T, col="steelblue",
main="Raw Data",
xlab="Ct value", xlim=xr)
hist(norm.e, breaks=30, prob=T, col="orange",
main="GAPDH-normalized Data",
xlab="Ct value", xlim=xr)
boxplot(exprs(combined),
main='Raw data')
boxplot(exprs(combined.norm),
main='GAPDH-normalized data')
dev.off()
#
# Three housekeeping genes (B2M, Beta_Actin, GAPDH), rank by stability
# Also, no cycles above 35 (or even 30), so no undetermined values to filter
#
group <- as.factor(pData(combined)[, 'group'])
hks <- combined[c('B2M', 'Beta_Actin', 'GAPDH'), ]
res.Negative <- selectHKs(hks[, group=='negative'],
method='geNorm',
Symbols=featureNames(hks),
minNrHK=2,
log=F)
res.Positive <-selectHKs(hks[, group=='positive'],
method='geNorm',
Symbols=featureNames(hks),
minNrHK=2,
log=F)
# Obtain ranks
ranks <- data.frame(c(1, 1:2), res.Negative$ranking, res.Positive$ranking)
names(ranks) <- c('rank', 'negative', 'positive')
ranks
# Compute gene expression stability
# Looks like GAPDH is actually doing worst
referenceCombined <- stabMeasureRho(hks, group=group, log=F)
sort(referenceCombined)
# Automatic selection suggests GAPDH, B2M
selectHKs(hks, group=group, log=F, trace=T,
Symbols=featureNames(hks),
minNrHKs=2,
method='NormFinder')$ranking
# Sangita requested normalization with GAPDH and Beta-Actin alone
# Normalize with combination of HKs, compare
hk <- c('Beta_Actin', 'GAPDH')
combined.norm2 <- deltaCt(qPCRBatch=combined,
hkgs=hk,
calc='artih')
exprs(combined.norm)
exprs(combined.norm2)
# Plot before/after normalization again
par(mfrow=c(2,2))
pdf('2hkNormalization.pdf')
qPCR.e <- na.exclude(as.vector(exprs(combined)))
norm.e <- na.exclude(as.vector(exprs(combined.norm2)))
xr <- range(c(qPCR.e, norm.e))
hist(qPCR.e, breaks=30, prob=T, col="steelblue",
main="Raw Data",
xlab="Ct value", xlim=xr)
hist(norm.e, breaks=30, prob=T, col="orange",
main="3HK-normalized Data",
xlab="Ct value", xlim=xr)
boxplot(exprs(combined),
main='Raw data')
boxplot(exprs(combined.norm2),
main='3HK-normalized data')
dev.off()
# Look at the expression values of just the HKs
exprs(combined)[rownames(exprs(combined)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
exprs(combined.norm)[rownames(exprs(combined.norm)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
exprs(combined.norm2)[rownames(exprs(combined.norm2)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
# What is the SD of the HKs?
apply(exprs(combined.norm)[rownames(exprs(combined.norm)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ],
1, sd)
apply(exprs(combined.norm2)[rownames(exprs(combined.norm2)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ],
1, sd)
# Pairwise sample correlation doesn't change much
par(mfrow=c(1,1))
pdf('pairwiseCorrelationRaw.pdf')
pairs(exprs(combined),
lower.panel=panel.cor
)
dev.off()
pdf('pairwiseCorrelationNormalized_GAPDH.pdf')
pairs(exprs(combined.norm),
lower.panel=panel.cor
)
dev.off()
pdf('pairwiseCorrelationNormalized_2HK.pdf')
pairs(exprs(combined.norm2),
lower.panel=panel.cor
)
dev.off()
######################################################################
# Visualize Ct differences
######################################################################
data <- as.data.frame(exprs(combined.norm))
# No need for HKs
data <- data[!rownames(data) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
head(data)
# Sangita asked for a dedicated gene order: G1/S, S, G2/M phase
geneOrder <- c('Cyclin_A2', 'Cyclin_B1', 'Cdk1',
'PCNA',
'Cyclin_D1', 'Cdk4', 'E2F6')
geneOrder <- geneOrder[length(geneOrder):1]
geneOrder <- as.data.frame(geneOrder)
colnames(geneOrder) <- c('Genes')
# Re-sort the normalized expression data
dataSort <- merge(geneOrder, data, by.x=1, by.y=0, sort=F)
rownames(dataSort) <- dataSort$Genes
dataSort <- dataSort[, c(2:7)]
colors <- brewer.pal(9, 'RdYlGn')
pal <- colorRampPalette(colors)(60)
pheatmap(dataSort,
color=pal,
cellwidth=25,
cellheight=12,
scale='none',
cluster_rows=F,
cluster_cols=F,
legend=T,
fontsize_col=12,
filename='allSamplesCtUnscaled_RedGreen_GAPDH.png')
# Again for the 2 HKs
data <- as.data.frame(exprs(combined.norm2))
# No need for HKs
data <- data[!rownames(data) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
head(data)
# Re-sort again
dataSort <- merge(geneOrder, data, by.x=1, by.y=0, sort=F)
rownames(dataSort) <- dataSort$Genes
dataSort <- dataSort[, c(2:7)]
# Plot again
pheatmap(dataSort,
color=pal,
cellwidth=25,
cellheight=12,
scale='none',
cluster_rows=F,
treeheight_row=100,
cluster_cols=F,
legend=T,
fontsize_col=12,
filename='allSamplesCtUnscaled_RedGreen_2HK_sorted.png')
######################################################################
# Switch to ddCt to get relative gene expression values
######################################################################
contrast <- cbind(c(1, 1, 1, 0, 0, 0), c(0, 0, 0, 1, 1, 1))
colnames(contrast) <- c('negative', 'positive')
rownames(contrast) <- sampleNames(combined.norm2)
contrast
hk <- c('Beta_Actin', 'GAPDH')
ddCq <- deltaDeltaCt(qPCRBatch=combined.norm2,
maxNACase=0,
maxNAControl=0,
hkg=hk,
contrastM=contrast,
case="positive",
control="negative",
statCalc="arith",
hkgCalc="arith")
changes <- ddCq[, c('ID', '2^-ddCt')]
ddCt <- as.vector(changes$'2^-ddCt')
class(ddCt) <- 'numeric'
fc <- ifelse(ddCt < 1, -1 / ddCt, ddCt)
changes$fc <- fc
changes
# Compare to manually calculated values (Excel table, Experiment 4 analysis)
ddCq[ddCq$ID %in% c('Cdk1', 'Cyclin_A2', 'Cyclin_B1'), c('ID', '2^-ddCt')]
######################################################################
# Plotting the changes; see http://emdbolker.wikidot.com/blog:dynamite
######################################################################
plotData <- exprs(combined.norm)
plotData <- plotData[!rownames(plotData) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ]
# Prepare for ggplot2
library(reshape)
colnames(plotData) <- c(rep('ve-', 3), rep('ve+', 3))
plotData <- melt(plotData)
colnames(plotData) <- c('Gene', 'Condition', 'value')
# Re-order x-axis
plotData$Gene <- factor(plotData$Gene,
levels=c('Cyclin_A2', 'Cyclin_B1', 'Cdk1',
'PCNA',
'Cyclin_D1', 'Cdk4', 'E2F6'))
theme_set(theme_grey())
# Basic plot data and labels
g0 <- ggplot(plotData, aes(x=Gene, y=value)) +
scale_y_continuous(name='Ct') +
opts(title='Ct values of ve-/ve+ relative to GAPDH',
axis.text.x=theme_text(angle=90, hjust=1, size=14),
axis.text.y=theme_text(size=14),
axis.title.x=theme_blank(),
axis.title.y=theme_text(angle=90, size=14, vjust=0.4),
legend.title=theme_text(size=14),
legend.text=theme_text(size=12)
)
# Standard dynamite plots
g_dyn <- g0 +
aes(fill=Condition) +
stat_summary(fun.data=mean_cl_normal,
geom='bar',
position='dodge') +
stat_summary(fun.data=mean_cl_normal,
geom='errorbar',
width=0.25,
position=position_dodge(width=0.90))
g_dyn
ggsave('dynamite.png')
# Just the error bar to remove some of the chart junk
g_errbar <- g0 +
aes(colour=Condition) +
stat_summary(fun.data=mean_cl_normal,
geom='pointrange',
size=1,
position=position_dodge(width=0.6))
g_errbar
ggsave('errorbar.png')
# Much prefer a boxplot along with the dots
g_boxplot <- g0 +
geom_boxplot(aes(fill=Condition)) +
scale_fill_brewer(palette = "Set1") +
geom_point(aes(colour=Condition),
size=3,
position=position_dodge(width=0.75))
g_boxplot
ggsave('boxplot.png')
# Try with facets
g1 <- ggplot(plotData, aes(x=Condition, y=value)) +
scale_y_continuous(name='Ct') +
opts(title='Ct values of ve-/ve+ relative to GAPDH',
axis.text.x=theme_blank()),
axis.text.y=theme_text(size=16, vjust=0.2),
axis.title.x=theme_blank(),
axis.title.y=theme_text(angle=90, size=16, vjust=0.2),
legend.title=theme_text(size=14),
legend.text=theme_text(size=12)
)
g_boxplot_f <- g1 +
geom_boxplot(aes(fill=Condition)) +
scale_fill_brewer(palette = "Set1") +
geom_point(aes(colour=Condition),
size=3,
position=position_dodge(width=0.75)) +
facet_grid(~Gene, as.table=F) +
opts(axis.text.x=theme_blank())
g_boxplot_f
ggsave('revised_boxplot.pdf')
# Or even _just_ the pointrange
g_range <- g0 +
aes(colour=Condition) +
scale_colour_brewer(palette="Set1") +
stat_summary(fun.ymin=min,
fun.ymax=max,
size=1.5,
geom='linerange',
position=position_dodge(width=0.5)) +
geom_point(aes(colour=Condition),
size=4, alpha=0.5,
position=position_dodge(width=0.5))
g_range
ggsave('linerange.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment