johnlees · April 24, 2018 15:16
diff --git a/pneumo_wgcna.R b/pneumo_wgcna.R
 require(WGCNA)

 # See:
 # https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf

 SPV_2_name = read.delim("SPV_2_name.txt", header=FALSE)
 colnames(SPV_2_name) = c("SPV", "gene")

 # co-expression matrix from Veening lab
 # https://www.biorxiv.org/content/early/2018/03/22/283739
 # data: https://zenodo.org/record/1157923

 # once
 `18_matrix_long_5` <- read.csv("18_matrix_long_5.csv", stringsAsFactors=FALSE)
 `18_matrix_long_4` <- read.csv("18_matrix_long_4.csv", stringsAsFactors=FALSE)
 `18_matrix_long_3` <- read.csv("18_matrix_long_3.csv", stringsAsFactors=FALSE)
 `18_matrix_long_2` <- read.csv("18_matrix_long_2.csv", stringsAsFactors=FALSE)
 `18_matrix_long_1` <- read.csv("18_matrix_long_1.csv", stringsAsFactors=FALSE)
 all = rbind(`18_matrix_long_1`, `18_matrix_long_2`, `18_matrix_long_3`, `18_matrix_long_4`, `18_matrix_long_5`)
 all_M = matrix(nrow = 2152, ncol = 2152, dimnames=list(origin=unique(all$gene1), dev=unique(all$gene2)))
 all_M[cbind(all$gene1, all$gene2)] = all$correlation
 saveRDS(all_M, "coexpression_matrix.Rds")
 # subsequently
 all_M = readRDS("coexpression_matrix.Rds")

 # Run WGCNA as in tutorial
 powers = c(c(1:10), seq(from = 12, to=20, by=2))
 sft = pickSoftThreshold.fromSimilarity(all_M, powerVector = powers, verbose = 5)

 sizeGrWindow(9, 5)
 par(mfrow = c(1,2))
 cex1 = 0.9

 plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"))
 text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red")
 # this line corresponds to using an R^2 cut-off of h
 abline(h=0.90,col="red")
 # Mean connectivity as a function of the soft-thresholding power
 plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
 text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

 softPower = 5
 adjacency = adjacency.fromSimilarity(all_M, power = softPower)

 # Turn adjacency into topological overlap
 TOM = TOMsimilarity(adjacency);
 dissTOM = 1-TOM

 # Call the hierarchical clustering function
 geneTree = hclust(as.dist(dissTOM), method = "average")
 # Plot the resulting clustering tree (dendrogram)
 sizeGrWindow(12,9)
 plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
     labels = FALSE, hang = 0.04)

 # We like large modules, so we set the minimum module size relatively high:
 minModuleSize = 10;
 # Module identification using dynamic tree cut:
 dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
                            deepSplit = 2, pamRespectsDendro = FALSE,
                            minClusterSize = minModuleSize);
 table(dynamicMods)

 # Convert numeric lables into colors
 dynamicColors = labels2colors(dynamicMods)
 table(dynamicColors)
 # Plot the dendrogram and colors underneath
 sizeGrWindow(8,6)
 plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

 colorOrder = c("grey", standardColors(50))
 moduleLabels = match(dynamicColors, colorOrder)-1
 modules = data.frame(SPV=rownames(all_M), module=moduleLabels)

 labelled_modules = merge(modules, SPV_2_name, by.x = "SPV", by.y = "SPV", all.x = TRUE)
 write.table(labelled_modules, "WGCNA_modules.tsv", quote = F, sep = "\t", row.names = F, col.names = F)

 #### !
 # Can't do this with similarity
 #### !
 # Not run

 # Calculate eigengenes
 MEList = moduleEigengenes(all_M, colors = dynamicColors)
 MEs = MEList$eigengenes
 # Calculate dissimilarity of module eigengenes
 MEDiss = 1-cor(MEs);
 # Cluster module eigengenes
 METree = hclust(as.dist(MEDiss), method = "average");
 # Plot the result
 sizeGrWindow(7, 6)
 plot(METree, main = "Clustering of module eigengenes",
     xlab = "", sub = "")
	require(WGCNA)

	# See:
	# https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf

	SPV_2_name = read.delim("SPV_2_name.txt", header=FALSE)
	colnames(SPV_2_name) = c("SPV", "gene")

	# co-expression matrix from Veening lab
	# https://www.biorxiv.org/content/early/2018/03/22/283739
	# data: https://zenodo.org/record/1157923

	# once
	`18_matrix_long_5` <- read.csv("18_matrix_long_5.csv", stringsAsFactors=FALSE)
	`18_matrix_long_4` <- read.csv("18_matrix_long_4.csv", stringsAsFactors=FALSE)
	`18_matrix_long_3` <- read.csv("18_matrix_long_3.csv", stringsAsFactors=FALSE)
	`18_matrix_long_2` <- read.csv("18_matrix_long_2.csv", stringsAsFactors=FALSE)
	`18_matrix_long_1` <- read.csv("18_matrix_long_1.csv", stringsAsFactors=FALSE)
	all = rbind(`18_matrix_long_1`, `18_matrix_long_2`, `18_matrix_long_3`, `18_matrix_long_4`, `18_matrix_long_5`)
	all_M = matrix(nrow = 2152, ncol = 2152, dimnames=list(origin=unique(all$gene1), dev=unique(all$gene2)))
	all_M[cbind(all$gene1, all$gene2)] = all$correlation
	saveRDS(all_M, "coexpression_matrix.Rds")
	# subsequently
	all_M = readRDS("coexpression_matrix.Rds")

	# Run WGCNA as in tutorial
	powers = c(c(1:10), seq(from = 12, to=20, by=2))
	sft = pickSoftThreshold.fromSimilarity(all_M, powerVector = powers, verbose = 5)

	sizeGrWindow(9, 5)
	par(mfrow = c(1,2))
	cex1 = 0.9

	plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
	xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
	main = paste("Scale independence"))
	text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
	labels=powers,cex=cex1,col="red")
	# this line corresponds to using an R^2 cut-off of h
	abline(h=0.90,col="red")
	# Mean connectivity as a function of the soft-thresholding power
	plot(sft$fitIndices[,1], sft$fitIndices[,5],
	xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
	main = paste("Mean connectivity"))
	text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

	softPower = 5
	adjacency = adjacency.fromSimilarity(all_M, power = softPower)

	# Turn adjacency into topological overlap
	TOM = TOMsimilarity(adjacency);
	dissTOM = 1-TOM

	# Call the hierarchical clustering function
	geneTree = hclust(as.dist(dissTOM), method = "average")
	# Plot the resulting clustering tree (dendrogram)
	sizeGrWindow(12,9)
	plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
	labels = FALSE, hang = 0.04)

	# We like large modules, so we set the minimum module size relatively high:
	minModuleSize = 10;
	# Module identification using dynamic tree cut:
	dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
	deepSplit = 2, pamRespectsDendro = FALSE,
	minClusterSize = minModuleSize);
	table(dynamicMods)

	# Convert numeric lables into colors
	dynamicColors = labels2colors(dynamicMods)
	table(dynamicColors)
	# Plot the dendrogram and colors underneath
	sizeGrWindow(8,6)
	plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
	dendroLabels = FALSE, hang = 0.03,
	addGuide = TRUE, guideHang = 0.05,
	main = "Gene dendrogram and module colors")

	colorOrder = c("grey", standardColors(50))
	moduleLabels = match(dynamicColors, colorOrder)-1
	modules = data.frame(SPV=rownames(all_M), module=moduleLabels)

	labelled_modules = merge(modules, SPV_2_name, by.x = "SPV", by.y = "SPV", all.x = TRUE)
	write.table(labelled_modules, "WGCNA_modules.tsv", quote = F, sep = "\t", row.names = F, col.names = F)

	#### !
	# Can't do this with similarity
	#### !
	# Not run

	# Calculate eigengenes
	MEList = moduleEigengenes(all_M, colors = dynamicColors)
	MEs = MEList$eigengenes
	# Calculate dissimilarity of module eigengenes
	MEDiss = 1-cor(MEs);
	# Cluster module eigengenes
	METree = hclust(as.dist(MEDiss), method = "average");
	# Plot the result
	sizeGrWindow(7, 6)
	plot(METree, main = "Clustering of module eigengenes",
	xlab = "", sub = "")