Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active July 4, 2017 08:14
Show Gist options
  • Save mbk0asis/9f1d2dc9ad9d03bae22483f7214a61d1 to your computer and use it in GitHub Desktop.
Save mbk0asis/9f1d2dc9ad9d03bae22483f7214a61d1 to your computer and use it in GitHub Desktop.
WGCNA ver2
library(WGCNA)
allowWGCNAThreads()
#################################################################################
# Loading expression data
setwd('/home/bio0/00-NGS/TCGA_met/data')
set1 <- read.csv("600samples.cosmic170k.csv", header = T, row.names = 1)
all.set <- read.csv("2400samples.27k.set2.2.csv", header = T, row.names = 1)
#################################################################################
cm<-set1
df<-as.data.frame(cm)
cm <- df[rowMeans(abs(df[-c(1:2)]))>0,]
dim(cm)
datExpr0 = as.data.frame(t(cm))
gsg=goodSamplesGenes(datExpr0, verbose = 10)
gsg$allOK
##################################################################################
sampleTree=hclust(dist(datExpr0),method="average")
sizeGrWindow(12,9)
par(cex=0.6)
par(mar=c(0,4,2,0))
plot(sampleTree,main="Sample clustering to detect outliers",sub = "",
cex=1,cex.axis=1.5,cex.main=2, hang = -1)
#abline(h=200,col="red")
##################################################################################
clust=cutreeStatic(sampleTree, cutHeight = 140, minSize = 1)
table(clust)
keepSamples=(clust==1)
datExpr=datExpr0[keepSamples,]
keepSamples2=(clust==2)
datExpr2=datExpr0[keepSamples2,]
##################################################################################
# Loading trait data
traitData=read.csv('600samples.cosmic170k.traits.csv',header=T)
allTraits=traitData
head(allTraits)
# combine trait and expression data
Samples=rownames(datExpr)
traitRows=match(Samples, allTraits$sample)
datTraits=allTraits[traitRows,-1]
datTraits
##################################################################################
collectGarbage() # cleaning memory
sampleTree2=hclust(dist(datExpr), method="average")
traitColors=numbers2colors(datTraits,signed=F)
plotDendroAndColors(sampleTree2,traitColors,groupLabels=names(datTraits), hang=-1,
main="Sample dendrogram and trait heatmap",cex.dendroLabels = 0.3)
##################################################################################
# one-step (automatic) network construction and module detection
options(stringsAsFactors=FALSE)
enableWGCNAThreads()
#* pick 'POWER' ***********************************************************************************#
powers=c(c(1:10), seq(from=12,to=20,by=2))
sft=pickSoftThreshold(datExpr,powerVector=powers,verbose=5) # soft-threshold (this takes long)
#**************************************************************************************************#
sizeGrWindow(9,5)
par(mfrow=c(1,2))
cex1=0.7
plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)",
ylab="Scale Free Topology Model Fit,signed R^2",type="n", main=paste("Scale independence"))
text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
abline(h=0.8,col="red")
plot(sft$fitIndices[,1],sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity",
type="n",main=paste("Mean Connectivity"))
text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex=cex1,col="red")
allowWGCNAThreads()
disableWGCNAThreads()
#* module detection *******************************************************************************#
net=blockwiseModules(datExpr, power=18, TOMType="unsigned",minModuleSize=5,reassignThreshold=0,
mergeCutHeight=0.2, numericLabels=T,pamRespectsDendro=F,saveTOMs=T,
saveTOMFileBase="TOM",verbose=3,maxBlockSize = 46000)
#**************************************************************************************************#
jpeg(filename = "All.set_cluster_dendro_set2.jpg", width = 1000, height = 500)
mergedColors=labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]],mergedColors[net$blockGenes[[1]]],"Module colors",
dendroLabels=F,hang=0.03,addGuide=T,guideHang=0.05)
dev.off()
##################################################################################
moduleLabels=net$colors
moduleColors=labels2colors(net$colors)
MEs=net$MEs
geneTree=net$dendrograms[[1]]
##################################################################################
# Relating modules and traits
nGenes=ncol(datExpr)
nSamples=nrow(datExpr)
MEs0=moduleEigengenes(datExpr,moduleColors)$eigengenes
MEs=orderMEs(MEs0)
# to remove "grey" module
MEs = removeGreyME(MEs, greyMEName = paste(moduleColor.getMEprefix(), "grey", sep=""))
moduleTraitCor=cor(MEs, datTraits,use="p")
moduleTraitPvalue=corPvalueStudent(moduleTraitCor, nSamples)
min <- data.frame(apply(moduleTraitCor,1,mean))
colnames(min) <- "pval"
head(data.frame(sort(min[,1])))
tail(data.frame(sort(min[,1])))
##################################################################################
##################################################################################
# SAVE module-trait correlation
write.csv(moduleTraitCor,"All.set_module_trait_cor_170K_power18.csv")
write.csv(moduleTraitPvalue,"All.set_module_trait_pval_170K_power18.csv")
head(moduleTraitCor)
# SAVE the heatmap
jpeg(filename = "All.set_module_trait_cor_170K_power18.jpg", width = 500, height = 1000)
textMatrix=paste(signif(moduleTraitCor,2)) #,"\n('signif(moduleTraitPvalue,1),')",sep="")
dim(textMatrix)=dim(moduleTraitCor)
par(mar=c(3,10,2,2))
labeledHeatmap(Matrix = moduleTraitCor,xLabels = names(datTraits), yLabels = names(MEs),
ySymbols = names(MEs),colorLabels = F,colors=blueWhiteRed(300), #textMatrix=textMatrix,
setStdMargins=F, cex.text = 0.6, cex.lab = .8,
zlim=c(-1,1),main=paste("All.set_module_trait_cor_170K_power18"))
dev.off()
##################################################################################
# GS (gene significance) and MM (module membership)
weight=as.data.frame(datTraits)
names(weight)="group"
modNames=substring(names(MEs),3)
modNames
geneModuleMembership=as.data.frame(cor(datExpr, MEs, use="p"))
MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership),nSamples))
names(geneModuleMembership)=paste("MM",modNames,sep = "")
names(MMPvalue)=paste("p.MM",modNames,sep="")
geneTraitSignificance=as.data.frame(cor(datExpr,weight,use = "p"))
GSPvalue=as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),nSamples))
names(geneTraitSignificance)=paste("GS",names(weight),sep="")
names(GSPvalue)=paste("p.GS",names(weight),sep="")
#################################################################################################
# Automatic file/pic export
# prepare a list of modules
# library(gplots)
file <- "module.list.csv" # a list of modules
close(con)
dev.off()
con <- file(description=file, open="r")
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
print(line)
module=line # if module ID's are in numbers, not colors, run 'label2color'
column=match(module,modNames)
moduleGenes=moduleColors==module
tiff(paste(line,'membership.tif'),units = "px",width = 500, height = 500)
par(mar=c(3,3,3,3))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes,1]),
xlab=paste("Module Membership in", module, "module"),ylab="Genes",
main=paste("Module membership vs gene significance \n"),
pch=19,cex.main=1,cex.lab = .6,cex.axis = .6,col="grey30",
xlim=c(0,1))
dev.off()
membership<-data.frame(cbind(names(datExpr)[moduleColors==module],
abs(geneModuleMembership[moduleGenes, column])))
colnames(membership) <- c("CpG", "ModuleMembership")
membership <- membership[order(membership$ModuleMembership), ]
nrow(membership)
write.csv(membership,paste(module,"170K.Set1.membership.csv", sep = '.'))
members<-t(datExpr[moduleColors==module])
df<-as.matrix(members)
tiff(paste(line,'heatmap.tif'),units = "px",width = 500, height = 500)
par(mar=c(3,3,3,3))
heatmap.2(df, col=bluered, margins = c(8,8),
main = paste(module,"- 170K.Set1"),
trace = "none", density ="none",
Colv=F,Rowv=T, dendrogram = "row",
ColSideColors=c(rep("grey30",100),rep("grey70",100),
rep("grey30",100),rep("grey70",100),
rep("grey30",100),rep("grey70",100)))
dev.off()
#write.csv(members,paste("members.170K.Set1",module,"csv", sep = '.'))
}
close(con)
########################################################################################
# select module
module="lightyellow" # if module ID's are in numbers, not colors, run 'label2color'
#labels2colors("14")
column=match(module,modNames)
moduleGenes=moduleColors==module
sizeGrWindow(12,9)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes,1]),
xlab=paste("Module Membership in", module, "module"),ylab="Genes",
main=paste("Module membership vs gene significance \n"),
pch=19,cex.main=1,cex.lab = .6,cex.axis = .6,col=module,
xlim=c(0,1))
#text(abs(geneModuleMembership[moduleGenes, column]),
# abs(geneTraitSignificance[moduleGenes,1]),
# labels=names(datExpr)[moduleColors=="darkturquoise"],
# cex=.7,col="black")
membership<-data.frame(cbind(names(datExpr)[moduleColors=="lightyellow"],
abs(geneModuleMembership[moduleGenes, column])))
colnames(membership) <- c("CpG", "ModuleMembership")
membership <- membership[order(membership$ModuleMembership), ]
nrow(membership)
write.csv(membership,"membership.set2.green.csv")
members<-t(datExpr[moduleColors == "lightyellow"])
df<-as.matrix(members)
#rel_exp<-log2(df+1)-log2(rowMeans(df+1))
library(gplots)
dim(df)
heatmap.2(df, col=bluered, margins = c(8,9),main = "lightyellow",
trace = "none", density ="none",
Colv=F,Rowv=T, dendrogram = "row",
ColSideColors=c(rep("grey30",100),rep("grey70",100),
rep("grey30",100),rep("grey70",100),
rep("grey30",100),rep("grey70",100)))
write.csv(members,"members.set2.green.csv")
noGreyMembers<-t(datExpr[moduleColors != "grey"])
head(noGreyMembers[,c(1:3)])
meanNoGreyMembers <- data.frame(
cbind(rowMeans(noGreyMembers[,c(1:300)]),rowMeans(noGreyMembers[,c(301:600)]),
rowMeans(noGreyMembers[,c(601:900)]),rowMeans(noGreyMembers[,c(901:1200)]),
rowMeans(noGreyMembers[,c(1201:1500)]),rowMeans(noGreyMembers[,c(1501:1800)]),
rowMeans(noGreyMembers[,c(1801:2099)]),rowMeans(noGreyMembers[,c(2100:2399)])))
colnames(meanNoGreyMembers)<-c("bladder","breast","cerivx","colon","liver","lung.ad","lung.sc","prostate")
head(meanNoGreyMembers)
stripchart(meanNoGreyMembers, method='jitter', vertical=T, pch=20,cex=.9,
col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F,
jitter=.4, ylim=c(-0.1, 1.1))
b<-boxplot(meanNoGreyMembers, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T,
outline=F, xaxt="n")
lines(b$stats[3,], lwd = 2, lty = 5, type = "o")
# bootstrapping
library(boot)
meanFunc <- function(x,i){mean(x[i])}
results <- boot(meanNoGreyMembers,meanFunc,10000)
summary(results$t)
boot.ci(boot.out = results, conf = 0.95, type="bca")
###############################################################################################################
########
# STOP
# STOP
# STOP
# STOP
# STOP
# STOP
# STOP
########
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment