Last active
April 24, 2018 15:16
-
-
Save johnlees/64013095026ccd77ff751fc2985b6fe2 to your computer and use it in GitHub Desktop.
Gene expression modules for S. pneumoniae from PneumoExpress co-expression matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(WGCNA) | |
# See: | |
# https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf | |
SPV_2_name = read.delim("SPV_2_name.txt", header=FALSE) | |
colnames(SPV_2_name) = c("SPV", "gene") | |
# co-expression matrix from Veening lab | |
# https://www.biorxiv.org/content/early/2018/03/22/283739 | |
# data: https://zenodo.org/record/1157923 | |
# once | |
`18_matrix_long_5` <- read.csv("18_matrix_long_5.csv", stringsAsFactors=FALSE) | |
`18_matrix_long_4` <- read.csv("18_matrix_long_4.csv", stringsAsFactors=FALSE) | |
`18_matrix_long_3` <- read.csv("18_matrix_long_3.csv", stringsAsFactors=FALSE) | |
`18_matrix_long_2` <- read.csv("18_matrix_long_2.csv", stringsAsFactors=FALSE) | |
`18_matrix_long_1` <- read.csv("18_matrix_long_1.csv", stringsAsFactors=FALSE) | |
all = rbind(`18_matrix_long_1`, `18_matrix_long_2`, `18_matrix_long_3`, `18_matrix_long_4`, `18_matrix_long_5`) | |
all_M = matrix(nrow = 2152, ncol = 2152, dimnames=list(origin=unique(all$gene1), dev=unique(all$gene2))) | |
all_M[cbind(all$gene1, all$gene2)] = all$correlation | |
saveRDS(all_M, "coexpression_matrix.Rds") | |
# subsequently | |
all_M = readRDS("coexpression_matrix.Rds") | |
# Run WGCNA as in tutorial | |
powers = c(c(1:10), seq(from = 12, to=20, by=2)) | |
sft = pickSoftThreshold.fromSimilarity(all_M, powerVector = powers, verbose = 5) | |
sizeGrWindow(9, 5) | |
par(mfrow = c(1,2)) | |
cex1 = 0.9 | |
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], | |
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", | |
main = paste("Scale independence")) | |
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], | |
labels=powers,cex=cex1,col="red") | |
# this line corresponds to using an R^2 cut-off of h | |
abline(h=0.90,col="red") | |
# Mean connectivity as a function of the soft-thresholding power | |
plot(sft$fitIndices[,1], sft$fitIndices[,5], | |
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", | |
main = paste("Mean connectivity")) | |
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") | |
softPower = 5 | |
adjacency = adjacency.fromSimilarity(all_M, power = softPower) | |
# Turn adjacency into topological overlap | |
TOM = TOMsimilarity(adjacency); | |
dissTOM = 1-TOM | |
# Call the hierarchical clustering function | |
geneTree = hclust(as.dist(dissTOM), method = "average") | |
# Plot the resulting clustering tree (dendrogram) | |
sizeGrWindow(12,9) | |
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", | |
labels = FALSE, hang = 0.04) | |
# We like large modules, so we set the minimum module size relatively high: | |
minModuleSize = 10; | |
# Module identification using dynamic tree cut: | |
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, | |
deepSplit = 2, pamRespectsDendro = FALSE, | |
minClusterSize = minModuleSize); | |
table(dynamicMods) | |
# Convert numeric lables into colors | |
dynamicColors = labels2colors(dynamicMods) | |
table(dynamicColors) | |
# Plot the dendrogram and colors underneath | |
sizeGrWindow(8,6) | |
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", | |
dendroLabels = FALSE, hang = 0.03, | |
addGuide = TRUE, guideHang = 0.05, | |
main = "Gene dendrogram and module colors") | |
colorOrder = c("grey", standardColors(50)) | |
moduleLabels = match(dynamicColors, colorOrder)-1 | |
modules = data.frame(SPV=rownames(all_M), module=moduleLabels) | |
labelled_modules = merge(modules, SPV_2_name, by.x = "SPV", by.y = "SPV", all.x = TRUE) | |
write.table(labelled_modules, "WGCNA_modules.tsv", quote = F, sep = "\t", row.names = F, col.names = F) | |
#### ! | |
# Can't do this with similarity | |
#### ! | |
# Not run | |
# Calculate eigengenes | |
MEList = moduleEigengenes(all_M, colors = dynamicColors) | |
MEs = MEList$eigengenes | |
# Calculate dissimilarity of module eigengenes | |
MEDiss = 1-cor(MEs); | |
# Cluster module eigengenes | |
METree = hclust(as.dist(MEDiss), method = "average"); | |
# Plot the result | |
sizeGrWindow(7, 6) | |
plot(METree, main = "Clustering of module eigengenes", | |
xlab = "", sub = "") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment