Created
April 21, 2012 01:00
-
-
Save ohofmann/2433014 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ReadqPCR) | |
library(NormqPCR) | |
library(pheatmap) | |
library(RColorBrewer) | |
library(ggplot2) | |
###################################################################### | |
# Setup | |
###################################################################### | |
# Pairwise correlations | |
panel.cor <- function(x, y, digits=2, prefix="", cex.cor) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r = (cor(x, y, use="pairwise")) | |
txt <- format(c(r, 0.123456789), digits=digits)[1] | |
txt <- paste(prefix, txt, sep="") | |
if(missing(cex.cor)) cex <- 0.6/strwidth(txt) | |
text(0.5, 0.5, txt, cex=cex ) | |
} | |
###################################################################### | |
# Parse data | |
###################################################################### | |
# | |
# We have technical and biological replicates, well information | |
# is available. Had to drop NCT reads as they did not have the | |
# same number of technical replicates. Odd limitation. | |
pcrData <- file.path('.', 'data', 'pcr_data_clean_rev.txt') | |
# Parse the reformated PCR data | |
qPCRBatch.qPCR <- read.qPCR(pcrData, verbose=T) | |
# No Ct above 35, no additional filtering required | |
qPCRBatch.qPCR | |
sampleNames(qPCRBatch.qPCR) | |
summary(exprs(qPCRBatch.qPCR))[, c(1:3)] | |
summary(exprs(qPCRBatch.qPCR))[, c(4:6)] | |
head(exprs.well.order(qPCRBatch.qPCR)) | |
# Combine the technical replicates (arithmetic mean) | |
combined <- combineTechReps(qPCRBatch.qPCR) | |
exprs(combined) | |
# Add group information | |
group <- c(rep('negative', 3), rep('positive', 3)) | |
phenoData <- pData(combined) | |
phenoData$group <- group | |
pData(combined) <- phenoData | |
###################################################################### | |
# Normalization tests | |
###################################################################### | |
# Compare GAPDH normalization with Sangita's results | |
hk <- 'GAPDH' | |
exprs(combined)[hk, ] | |
combined.norm <- deltaCt(qPCRBatch=combined, | |
hkgs=hk, | |
calc='arith') | |
exprs(combined) | |
exprs(combined.norm) | |
write.table(exprs(combined.norm), file='qPCR_Norm_GAPDH.txt', sep='\t', quote=F) | |
# Plot before/after normalization. Not a whole lot of difference | |
pdf('gapdhNormalization.pdf') | |
par(mfrow=c(2,2)) | |
qPCR.e <- na.exclude(as.vector(exprs(combined))) | |
norm.e <- na.exclude(as.vector(exprs(combined.norm))) | |
xr <- range(c(qPCR.e, norm.e)) | |
hist(qPCR.e, breaks=30, prob=T, col="steelblue", | |
main="Raw Data", | |
xlab="Ct value", xlim=xr) | |
hist(norm.e, breaks=30, prob=T, col="orange", | |
main="GAPDH-normalized Data", | |
xlab="Ct value", xlim=xr) | |
boxplot(exprs(combined), | |
main='Raw data') | |
boxplot(exprs(combined.norm), | |
main='GAPDH-normalized data') | |
dev.off() | |
# | |
# Three housekeeping genes (B2M, Beta_Actin, GAPDH), rank by stability | |
# Also, no cycles above 35 (or even 30), so no undetermined values to filter | |
# | |
group <- as.factor(pData(combined)[, 'group']) | |
hks <- combined[c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
res.Negative <- selectHKs(hks[, group=='negative'], | |
method='geNorm', | |
Symbols=featureNames(hks), | |
minNrHK=2, | |
log=F) | |
res.Positive <-selectHKs(hks[, group=='positive'], | |
method='geNorm', | |
Symbols=featureNames(hks), | |
minNrHK=2, | |
log=F) | |
# Obtain ranks | |
ranks <- data.frame(c(1, 1:2), res.Negative$ranking, res.Positive$ranking) | |
names(ranks) <- c('rank', 'negative', 'positive') | |
ranks | |
# Compute gene expression stability | |
# Looks like GAPDH is actually doing worst | |
referenceCombined <- stabMeasureRho(hks, group=group, log=F) | |
sort(referenceCombined) | |
# Automatic selection suggests GAPDH, B2M | |
selectHKs(hks, group=group, log=F, trace=T, | |
Symbols=featureNames(hks), | |
minNrHKs=2, | |
method='NormFinder')$ranking | |
# Sangita requested normalization with GAPDH and Beta-Actin alone | |
# Normalize with combination of HKs, compare | |
hk <- c('Beta_Actin', 'GAPDH') | |
combined.norm2 <- deltaCt(qPCRBatch=combined, | |
hkgs=hk, | |
calc='artih') | |
exprs(combined.norm) | |
exprs(combined.norm2) | |
# Plot before/after normalization again | |
par(mfrow=c(2,2)) | |
pdf('2hkNormalization.pdf') | |
qPCR.e <- na.exclude(as.vector(exprs(combined))) | |
norm.e <- na.exclude(as.vector(exprs(combined.norm2))) | |
xr <- range(c(qPCR.e, norm.e)) | |
hist(qPCR.e, breaks=30, prob=T, col="steelblue", | |
main="Raw Data", | |
xlab="Ct value", xlim=xr) | |
hist(norm.e, breaks=30, prob=T, col="orange", | |
main="3HK-normalized Data", | |
xlab="Ct value", xlim=xr) | |
boxplot(exprs(combined), | |
main='Raw data') | |
boxplot(exprs(combined.norm2), | |
main='3HK-normalized data') | |
dev.off() | |
# Look at the expression values of just the HKs | |
exprs(combined)[rownames(exprs(combined)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
exprs(combined.norm)[rownames(exprs(combined.norm)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
exprs(combined.norm2)[rownames(exprs(combined.norm2)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
# What is the SD of the HKs? | |
apply(exprs(combined.norm)[rownames(exprs(combined.norm)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ], | |
1, sd) | |
apply(exprs(combined.norm2)[rownames(exprs(combined.norm2)) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ], | |
1, sd) | |
# Pairwise sample correlation doesn't change much | |
par(mfrow=c(1,1)) | |
pdf('pairwiseCorrelationRaw.pdf') | |
pairs(exprs(combined), | |
lower.panel=panel.cor | |
) | |
dev.off() | |
pdf('pairwiseCorrelationNormalized_GAPDH.pdf') | |
pairs(exprs(combined.norm), | |
lower.panel=panel.cor | |
) | |
dev.off() | |
pdf('pairwiseCorrelationNormalized_2HK.pdf') | |
pairs(exprs(combined.norm2), | |
lower.panel=panel.cor | |
) | |
dev.off() | |
###################################################################### | |
# Visualize Ct differences | |
###################################################################### | |
data <- as.data.frame(exprs(combined.norm)) | |
# No need for HKs | |
data <- data[!rownames(data) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
head(data) | |
# Sangita asked for a dedicated gene order: G1/S, S, G2/M phase | |
geneOrder <- c('Cyclin_A2', 'Cyclin_B1', 'Cdk1', | |
'PCNA', | |
'Cyclin_D1', 'Cdk4', 'E2F6') | |
geneOrder <- geneOrder[length(geneOrder):1] | |
geneOrder <- as.data.frame(geneOrder) | |
colnames(geneOrder) <- c('Genes') | |
# Re-sort the normalized expression data | |
dataSort <- merge(geneOrder, data, by.x=1, by.y=0, sort=F) | |
rownames(dataSort) <- dataSort$Genes | |
dataSort <- dataSort[, c(2:7)] | |
colors <- brewer.pal(9, 'RdYlGn') | |
pal <- colorRampPalette(colors)(60) | |
pheatmap(dataSort, | |
color=pal, | |
cellwidth=25, | |
cellheight=12, | |
scale='none', | |
cluster_rows=F, | |
cluster_cols=F, | |
legend=T, | |
fontsize_col=12, | |
filename='allSamplesCtUnscaled_RedGreen_GAPDH.png') | |
# Again for the 2 HKs | |
data <- as.data.frame(exprs(combined.norm2)) | |
# No need for HKs | |
data <- data[!rownames(data) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
head(data) | |
# Re-sort again | |
dataSort <- merge(geneOrder, data, by.x=1, by.y=0, sort=F) | |
rownames(dataSort) <- dataSort$Genes | |
dataSort <- dataSort[, c(2:7)] | |
# Plot again | |
pheatmap(dataSort, | |
color=pal, | |
cellwidth=25, | |
cellheight=12, | |
scale='none', | |
cluster_rows=F, | |
treeheight_row=100, | |
cluster_cols=F, | |
legend=T, | |
fontsize_col=12, | |
filename='allSamplesCtUnscaled_RedGreen_2HK_sorted.png') | |
###################################################################### | |
# Switch to ddCt to get relative gene expression values | |
###################################################################### | |
contrast <- cbind(c(1, 1, 1, 0, 0, 0), c(0, 0, 0, 1, 1, 1)) | |
colnames(contrast) <- c('negative', 'positive') | |
rownames(contrast) <- sampleNames(combined.norm2) | |
contrast | |
hk <- c('Beta_Actin', 'GAPDH') | |
ddCq <- deltaDeltaCt(qPCRBatch=combined.norm2, | |
maxNACase=0, | |
maxNAControl=0, | |
hkg=hk, | |
contrastM=contrast, | |
case="positive", | |
control="negative", | |
statCalc="arith", | |
hkgCalc="arith") | |
changes <- ddCq[, c('ID', '2^-ddCt')] | |
ddCt <- as.vector(changes$'2^-ddCt') | |
class(ddCt) <- 'numeric' | |
fc <- ifelse(ddCt < 1, -1 / ddCt, ddCt) | |
changes$fc <- fc | |
changes | |
# Compare to manually calculated values (Excel table, Experiment 4 analysis) | |
ddCq[ddCq$ID %in% c('Cdk1', 'Cyclin_A2', 'Cyclin_B1'), c('ID', '2^-ddCt')] | |
###################################################################### | |
# Plotting the changes; see http://emdbolker.wikidot.com/blog:dynamite | |
###################################################################### | |
plotData <- exprs(combined.norm) | |
plotData <- plotData[!rownames(plotData) %in% c('B2M', 'Beta_Actin', 'GAPDH'), ] | |
# Prepare for ggplot2 | |
library(reshape) | |
colnames(plotData) <- c(rep('ve-', 3), rep('ve+', 3)) | |
plotData <- melt(plotData) | |
colnames(plotData) <- c('Gene', 'Condition', 'value') | |
# Re-order x-axis | |
plotData$Gene <- factor(plotData$Gene, | |
levels=c('Cyclin_A2', 'Cyclin_B1', 'Cdk1', | |
'PCNA', | |
'Cyclin_D1', 'Cdk4', 'E2F6')) | |
theme_set(theme_grey()) | |
# Basic plot data and labels | |
g0 <- ggplot(plotData, aes(x=Gene, y=value)) + | |
scale_y_continuous(name='Ct') + | |
opts(title='Ct values of ve-/ve+ relative to GAPDH', | |
axis.text.x=theme_text(angle=90, hjust=1, size=14), | |
axis.text.y=theme_text(size=14), | |
axis.title.x=theme_blank(), | |
axis.title.y=theme_text(angle=90, size=14, vjust=0.4), | |
legend.title=theme_text(size=14), | |
legend.text=theme_text(size=12) | |
) | |
# Standard dynamite plots | |
g_dyn <- g0 + | |
aes(fill=Condition) + | |
stat_summary(fun.data=mean_cl_normal, | |
geom='bar', | |
position='dodge') + | |
stat_summary(fun.data=mean_cl_normal, | |
geom='errorbar', | |
width=0.25, | |
position=position_dodge(width=0.90)) | |
g_dyn | |
ggsave('dynamite.png') | |
# Just the error bar to remove some of the chart junk | |
g_errbar <- g0 + | |
aes(colour=Condition) + | |
stat_summary(fun.data=mean_cl_normal, | |
geom='pointrange', | |
size=1, | |
position=position_dodge(width=0.6)) | |
g_errbar | |
ggsave('errorbar.png') | |
# Much prefer a boxplot along with the dots | |
g_boxplot <- g0 + | |
geom_boxplot(aes(fill=Condition)) + | |
scale_fill_brewer(palette = "Set1") + | |
geom_point(aes(colour=Condition), | |
size=3, | |
position=position_dodge(width=0.75)) | |
g_boxplot | |
ggsave('boxplot.png') | |
# Try with facets | |
g1 <- ggplot(plotData, aes(x=Condition, y=value)) + | |
scale_y_continuous(name='Ct') + | |
opts(title='Ct values of ve-/ve+ relative to GAPDH', | |
axis.text.x=theme_blank()), | |
axis.text.y=theme_text(size=16, vjust=0.2), | |
axis.title.x=theme_blank(), | |
axis.title.y=theme_text(angle=90, size=16, vjust=0.2), | |
legend.title=theme_text(size=14), | |
legend.text=theme_text(size=12) | |
) | |
g_boxplot_f <- g1 + | |
geom_boxplot(aes(fill=Condition)) + | |
scale_fill_brewer(palette = "Set1") + | |
geom_point(aes(colour=Condition), | |
size=3, | |
position=position_dodge(width=0.75)) + | |
facet_grid(~Gene, as.table=F) + | |
opts(axis.text.x=theme_blank()) | |
g_boxplot_f | |
ggsave('revised_boxplot.pdf') | |
# Or even _just_ the pointrange | |
g_range <- g0 + | |
aes(colour=Condition) + | |
scale_colour_brewer(palette="Set1") + | |
stat_summary(fun.ymin=min, | |
fun.ymax=max, | |
size=1.5, | |
geom='linerange', | |
position=position_dodge(width=0.5)) + | |
geom_point(aes(colour=Condition), | |
size=4, alpha=0.5, | |
position=position_dodge(width=0.5)) | |
g_range | |
ggsave('linerange.png') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment