Skip to content

Instantly share code, notes, and snippets.

@jwbargsten
Created September 3, 2012 21:09
Show Gist options
  • Save jwbargsten/3613577 to your computer and use it in GitHub Desktop.
Save jwbargsten/3613577 to your computer and use it in GitHub Desktop.
correlation analysis - light intensity experiment
# Copyright (c) 2012 Joachim Bargsten
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without
# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial
# portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
# EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
library(colorRamps)
library(grid)
library(reshape2)
library(reshape)
library(scales)
library(ggplot2)
library(plyr)
library(gdata)
library(Hmisc)
comma_format2 <- function (...) { function(x) comma(round(x,2), ...) }
data.raw <- read.csv("correlation_hanzi_2012-08-20.csv")
correlation_heatmap_file <- "correlation_matrix.fdr.png"
correlation_boxplot_file <- "light-intensity_vs_all.png"
correlation_boxplot_jitter_file <- "light-intensity_vs_all-jitter.png"
correlation_data_file <- "correlation.results.csv"
data <- data.raw[, names(data.raw) != "repetition"]
## omit na introduced by excel
data <- data[!is.na(data[,"genotypes"]),]
## show NAs
colwise(function(x) { sum(is.na(x)) })(data)
## replace NA by mean per genotype
data.nona <- ddply(
data,
.(genotypes),
colwise(function(x) { if(is.numeric(x)) x[is.na(x)] <- mean(na.omit(x)); x })
)
## check again for NAs
colwise(function(x) { sum(is.na(x)) })(data.nona)
## normalise data
data.scaled <- colwise(function(x) { if(is.numeric(x)) return(scale(x)); x })(data.nona)
## rearrange rows for correlation analysis
## (makes it easier to extract the correlation and p-values as submatrix)
first_rows <- c("genotypes", "light_intensity")
data.scaled <- cbind(
data.scaled[,names(data.scaled) %in% first_rows],
data.scaled[,!names(data.scaled) %in% first_rows]
)
## only do the correlation on numeric columns
data.numcols <- unlist(colwise(is.numeric)(data.scaled))
## do the correlation by genotype
ds.cor <- ddply(
data.scaled,
.(genotypes),
function(ds,numcols) {
## what is the genotype of the subset
genotype <- ds[1,"genotypes"]
## calculate the correlation of all numeric columns
ds.corprob <- rcorr(as.matrix(ds[,numcols]), type="pearson")
ds.corprob$P[!is.na(ds.corprob$P)] <- p.adjust(ds.corprob$P[!is.na(ds.corprob$P)], method="fdr")
## separate light intensity from the rest
ds.cor <- ds.corprob$r[-1,1]
ds.prob <- t(ds.corprob$P[1,-1])[1,]
ds.cor.long <- data.frame(
test=names(ds.cor),
correlation=ds.cor,
p.value=ds.prob,
genotypes=rep(genotype, length(ds.cor)),
covariates=rep("light_intensity", length(ds.cor))
)
ds.cor.long
},
data.numcols
)
## create the heatmap plot
p <- ggplot(ds.cor, aes(test, genotypes)) +
geom_tile(data=subset(ds.cor, !is.na(correlation) & p.value < 0.05), aes(fill=correlation),colour="white") +
scale_fill_gradient2(low="steelblue", high="darkred",breaks=seq(-0.7,0.7,0.1), labels=comma_format2()) +
geom_tile(data=subset(ds.cor, is.na(correlation) | p.value > 0.05 ), aes(colour=NA, fill=correlation), linetype=1, fill="#dfdfdf", colour="white")
p <- p + opts(
axis.ticks = theme_blank(),
axis.text.x = theme_text(angle=90,vjust=0,hjust=1),
axis.text.y = theme_text(face="bold", size=8, hjust=1)
)
p <- p + scale_y_discrete( expand=c(0,0)) + scale_x_discrete(expand=c(0,0))
## plot the heatmap
png(correlation_heatmap_file,res=300, width=19, height=17, units="cm")
p + facet_grid(covariates ~ .)
dev.off()
## create a boxplot of all vs light intenisty
png(correlation_boxplot_file,res=300, width=30, height=20, units="cm")
data.box <- data.nona
data.box[,"light_intensity"] <- as.factor(as.character(data.box[,"light_intensity"]))
data.box.long <- melt(data.box)
p <- ggplot(data.box.long, aes(light_intensity, value)) +
geom_boxplot() +
facet_wrap(~ variable, scales="free")
p
dev.off()
## same as above, only with data points included
png(correlation_boxplot_jitter_file,res=300, width=30, height=20, units="cm")
p + geom_jitter(aes(light_intensity, value, colour=genotypes))
dev.off()
write.csv(ds.cor, correlation_data_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment