Skip to content

Instantly share code, notes, and snippets.

@aseetharam
Last active August 29, 2015 14:28
Show Gist options
  • Save aseetharam/975ab89052f1a8964429 to your computer and use it in GitHub Desktop.
Save aseetharam/975ab89052f1a8964429 to your computer and use it in GitHub Desktop.
Clustering metabolomics data for different tissues from various location
library(Heatplus)
library(vegan)
library(RColorBrewer)
library("gplots")
all.data <- read.csv("C:/Users/Arun Seetharam/OneDrive/PostDoc/Projects/20150303_Perera_metabolomics/bloodroot_data_v2d.csv", quote="")
row.names(all.data) <- all.data$ID
all.data <- all.data[, -1]
data.prop <- all.data/rowSums(all.data)
scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100)
heatmap(as.matrix(data.prop), Rowv = NA, Colv = NA, col = scaleyellowred)
maxab <- apply(data.prop, 2, max)
head(maxab)
data.prop.1 <- data.prop
heatmap(as.matrix(data.prop.1), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2))
data.dist <- vegdist(data.prop, method = "bray")
row.clus <- hclust(data.dist, "aver")
heatmap(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = NA, col = scaleyellowred, margins = c(10, 3))
data.dist.g <- vegdist(t(data.prop.1), method = "bray"
)
col.clus <- hclust(data.dist.g, "aver")
heatmap(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3))
# for tissue based labelling
var1 <-c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 2, 2, 2, 3, 3, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4)
var1 <- replace(var1, which(var1 == 4), "green")
var1 <- replace(var1, which(var1 == 3), "orange")
var1 <- replace(var1, which(var1 == 2), "magenta")
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
cbind(row.names(data.prop), var1)
heatmap.2(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(12, 7), trace = "none", density.info = "none", xlab = "Metabolites", ylab = "samples", main = "Bloodroot Heatmap", lhei = c(2, 8))
heatmap.2(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(12, 8), trace = "none", density.info = "none", xlab = "Metabolites", ylab = "samples", main = "Bloodroot Heatmap", lhei = c(2, 8))
# for location based labelling
var1 <-c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6)
var1 <- replace(var1, which(var1 == 3), "orange")
var1 <- replace(var1, which(var1 == 2), "magenta")
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
var1 <- replace(var1, which(var1 == 4), "green")
var1 <- replace(var1, which(var1 == 5), "brown")
var1 <- replace(var1, which(var1 == 6), "grey")
cbind(row.names(data.prop), var1)
heatmap.2(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(12, 7), trace = "none", density.info = "none", xlab = "Metabolites", ylab = "samples", main = "Bloodroot Heatmap", lhei = c(2, 8))
heatmap.2(as.matrix(data.prop.1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(12, 8), trace = "none", density.info = "none", xlab = "Metabolites", ylab = "samples", main = "Bloodroot Heatmap", lhei = c(2, 8))
all.data <- read.csv("C:/Users/Arun Seetharam/OneDrive/PostDoc/Projects/20150303_Perera_metabolomics/bloodroot_data_v2e.csv", quote="")
iris <- all.data
ir <- iris[, 4:21]
# if you don't have missing data (i.e., everyting non-zeros) use log instead
# gives better resolution for small numbers
# ir <- log(iris[, 4:21])
ir.location <- iris[, 2]
ir.tissue <- iris[, 3]
ir.pca <- prcomp(ir, center = TRUE, scale. = TRUE)
print(ir.pca)
plot(ir.pca, type = "l")
summary(ir.pca)
predict(ir.pca,
newdata=tail(ir, 2))
library(devtools)
library(ggbiplot)
# for location PCA
g <- ggbiplot(ir.pca, obs.scale = 1, var.scale = 1, groups = ir.location, ellipse = TRUE, circle = TRUE)
# if you don't need arrows/circle, then set circle = FALSE, var.axes = FALSE
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
# for tissue PCA
g <- ggbiplot(ir.pca, obs.scale = 1, var.scale = 1, groups = ir.tissue, ellipse = TRUE, circle = TRUE)
# if you don't need arrows/circle, then set circle = FALSE, var.axes = FALSE
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
# save history
savehistory("C:/Users/Arun Seetharam/OneDrive/PostDoc/Projects/20150303_Perera_metabolomics/R_cmds_PCA.R")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment