Last active
July 4, 2017 08:14
-
-
Save mbk0asis/9f1d2dc9ad9d03bae22483f7214a61d1 to your computer and use it in GitHub Desktop.
WGCNA ver2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(WGCNA) | |
| allowWGCNAThreads() | |
| ################################################################################# | |
| # Loading expression data | |
| setwd('/home/bio0/00-NGS/TCGA_met/data') | |
| set1 <- read.csv("600samples.cosmic170k.csv", header = T, row.names = 1) | |
| all.set <- read.csv("2400samples.27k.set2.2.csv", header = T, row.names = 1) | |
| ################################################################################# | |
| cm<-set1 | |
| df<-as.data.frame(cm) | |
| cm <- df[rowMeans(abs(df[-c(1:2)]))>0,] | |
| dim(cm) | |
| datExpr0 = as.data.frame(t(cm)) | |
| gsg=goodSamplesGenes(datExpr0, verbose = 10) | |
| gsg$allOK | |
| ################################################################################## | |
| sampleTree=hclust(dist(datExpr0),method="average") | |
| sizeGrWindow(12,9) | |
| par(cex=0.6) | |
| par(mar=c(0,4,2,0)) | |
| plot(sampleTree,main="Sample clustering to detect outliers",sub = "", | |
| cex=1,cex.axis=1.5,cex.main=2, hang = -1) | |
| #abline(h=200,col="red") | |
| ################################################################################## | |
| clust=cutreeStatic(sampleTree, cutHeight = 140, minSize = 1) | |
| table(clust) | |
| keepSamples=(clust==1) | |
| datExpr=datExpr0[keepSamples,] | |
| keepSamples2=(clust==2) | |
| datExpr2=datExpr0[keepSamples2,] | |
| ################################################################################## | |
| # Loading trait data | |
| traitData=read.csv('600samples.cosmic170k.traits.csv',header=T) | |
| allTraits=traitData | |
| head(allTraits) | |
| # combine trait and expression data | |
| Samples=rownames(datExpr) | |
| traitRows=match(Samples, allTraits$sample) | |
| datTraits=allTraits[traitRows,-1] | |
| datTraits | |
| ################################################################################## | |
| collectGarbage() # cleaning memory | |
| sampleTree2=hclust(dist(datExpr), method="average") | |
| traitColors=numbers2colors(datTraits,signed=F) | |
| plotDendroAndColors(sampleTree2,traitColors,groupLabels=names(datTraits), hang=-1, | |
| main="Sample dendrogram and trait heatmap",cex.dendroLabels = 0.3) | |
| ################################################################################## | |
| # one-step (automatic) network construction and module detection | |
| options(stringsAsFactors=FALSE) | |
| enableWGCNAThreads() | |
| #* pick 'POWER' ***********************************************************************************# | |
| powers=c(c(1:10), seq(from=12,to=20,by=2)) | |
| sft=pickSoftThreshold(datExpr,powerVector=powers,verbose=5) # soft-threshold (this takes long) | |
| #**************************************************************************************************# | |
| sizeGrWindow(9,5) | |
| par(mfrow=c(1,2)) | |
| cex1=0.7 | |
| plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)", | |
| ylab="Scale Free Topology Model Fit,signed R^2",type="n", main=paste("Scale independence")) | |
| text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2], | |
| labels=powers,cex=cex1,col="red") | |
| abline(h=0.8,col="red") | |
| plot(sft$fitIndices[,1],sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", | |
| type="n",main=paste("Mean Connectivity")) | |
| text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex=cex1,col="red") | |
| allowWGCNAThreads() | |
| disableWGCNAThreads() | |
| #* module detection *******************************************************************************# | |
| net=blockwiseModules(datExpr, power=18, TOMType="unsigned",minModuleSize=5,reassignThreshold=0, | |
| mergeCutHeight=0.2, numericLabels=T,pamRespectsDendro=F,saveTOMs=T, | |
| saveTOMFileBase="TOM",verbose=3,maxBlockSize = 46000) | |
| #**************************************************************************************************# | |
| jpeg(filename = "All.set_cluster_dendro_set2.jpg", width = 1000, height = 500) | |
| mergedColors=labels2colors(net$colors) | |
| plotDendroAndColors(net$dendrograms[[1]],mergedColors[net$blockGenes[[1]]],"Module colors", | |
| dendroLabels=F,hang=0.03,addGuide=T,guideHang=0.05) | |
| dev.off() | |
| ################################################################################## | |
| moduleLabels=net$colors | |
| moduleColors=labels2colors(net$colors) | |
| MEs=net$MEs | |
| geneTree=net$dendrograms[[1]] | |
| ################################################################################## | |
| # Relating modules and traits | |
| nGenes=ncol(datExpr) | |
| nSamples=nrow(datExpr) | |
| MEs0=moduleEigengenes(datExpr,moduleColors)$eigengenes | |
| MEs=orderMEs(MEs0) | |
| # to remove "grey" module | |
| MEs = removeGreyME(MEs, greyMEName = paste(moduleColor.getMEprefix(), "grey", sep="")) | |
| moduleTraitCor=cor(MEs, datTraits,use="p") | |
| moduleTraitPvalue=corPvalueStudent(moduleTraitCor, nSamples) | |
| min <- data.frame(apply(moduleTraitCor,1,mean)) | |
| colnames(min) <- "pval" | |
| head(data.frame(sort(min[,1]))) | |
| tail(data.frame(sort(min[,1]))) | |
| ################################################################################## | |
| ################################################################################## | |
| # SAVE module-trait correlation | |
| write.csv(moduleTraitCor,"All.set_module_trait_cor_170K_power18.csv") | |
| write.csv(moduleTraitPvalue,"All.set_module_trait_pval_170K_power18.csv") | |
| head(moduleTraitCor) | |
| # SAVE the heatmap | |
| jpeg(filename = "All.set_module_trait_cor_170K_power18.jpg", width = 500, height = 1000) | |
| textMatrix=paste(signif(moduleTraitCor,2)) #,"\n('signif(moduleTraitPvalue,1),')",sep="") | |
| dim(textMatrix)=dim(moduleTraitCor) | |
| par(mar=c(3,10,2,2)) | |
| labeledHeatmap(Matrix = moduleTraitCor,xLabels = names(datTraits), yLabels = names(MEs), | |
| ySymbols = names(MEs),colorLabels = F,colors=blueWhiteRed(300), #textMatrix=textMatrix, | |
| setStdMargins=F, cex.text = 0.6, cex.lab = .8, | |
| zlim=c(-1,1),main=paste("All.set_module_trait_cor_170K_power18")) | |
| dev.off() | |
| ################################################################################## | |
| # GS (gene significance) and MM (module membership) | |
| weight=as.data.frame(datTraits) | |
| names(weight)="group" | |
| modNames=substring(names(MEs),3) | |
| modNames | |
| geneModuleMembership=as.data.frame(cor(datExpr, MEs, use="p")) | |
| MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership),nSamples)) | |
| names(geneModuleMembership)=paste("MM",modNames,sep = "") | |
| names(MMPvalue)=paste("p.MM",modNames,sep="") | |
| geneTraitSignificance=as.data.frame(cor(datExpr,weight,use = "p")) | |
| GSPvalue=as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),nSamples)) | |
| names(geneTraitSignificance)=paste("GS",names(weight),sep="") | |
| names(GSPvalue)=paste("p.GS",names(weight),sep="") | |
| ################################################################################################# | |
| # Automatic file/pic export | |
| # prepare a list of modules | |
| # library(gplots) | |
| file <- "module.list.csv" # a list of modules | |
| close(con) | |
| dev.off() | |
| con <- file(description=file, open="r") | |
| while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) { | |
| print(line) | |
| module=line # if module ID's are in numbers, not colors, run 'label2color' | |
| column=match(module,modNames) | |
| moduleGenes=moduleColors==module | |
| tiff(paste(line,'membership.tif'),units = "px",width = 500, height = 500) | |
| par(mar=c(3,3,3,3)) | |
| verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), | |
| abs(geneTraitSignificance[moduleGenes,1]), | |
| xlab=paste("Module Membership in", module, "module"),ylab="Genes", | |
| main=paste("Module membership vs gene significance \n"), | |
| pch=19,cex.main=1,cex.lab = .6,cex.axis = .6,col="grey30", | |
| xlim=c(0,1)) | |
| dev.off() | |
| membership<-data.frame(cbind(names(datExpr)[moduleColors==module], | |
| abs(geneModuleMembership[moduleGenes, column]))) | |
| colnames(membership) <- c("CpG", "ModuleMembership") | |
| membership <- membership[order(membership$ModuleMembership), ] | |
| nrow(membership) | |
| write.csv(membership,paste(module,"170K.Set1.membership.csv", sep = '.')) | |
| members<-t(datExpr[moduleColors==module]) | |
| df<-as.matrix(members) | |
| tiff(paste(line,'heatmap.tif'),units = "px",width = 500, height = 500) | |
| par(mar=c(3,3,3,3)) | |
| heatmap.2(df, col=bluered, margins = c(8,8), | |
| main = paste(module,"- 170K.Set1"), | |
| trace = "none", density ="none", | |
| Colv=F,Rowv=T, dendrogram = "row", | |
| ColSideColors=c(rep("grey30",100),rep("grey70",100), | |
| rep("grey30",100),rep("grey70",100), | |
| rep("grey30",100),rep("grey70",100))) | |
| dev.off() | |
| #write.csv(members,paste("members.170K.Set1",module,"csv", sep = '.')) | |
| } | |
| close(con) | |
| ######################################################################################## | |
| # select module | |
| module="lightyellow" # if module ID's are in numbers, not colors, run 'label2color' | |
| #labels2colors("14") | |
| column=match(module,modNames) | |
| moduleGenes=moduleColors==module | |
| sizeGrWindow(12,9) | |
| verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), | |
| abs(geneTraitSignificance[moduleGenes,1]), | |
| xlab=paste("Module Membership in", module, "module"),ylab="Genes", | |
| main=paste("Module membership vs gene significance \n"), | |
| pch=19,cex.main=1,cex.lab = .6,cex.axis = .6,col=module, | |
| xlim=c(0,1)) | |
| #text(abs(geneModuleMembership[moduleGenes, column]), | |
| # abs(geneTraitSignificance[moduleGenes,1]), | |
| # labels=names(datExpr)[moduleColors=="darkturquoise"], | |
| # cex=.7,col="black") | |
| membership<-data.frame(cbind(names(datExpr)[moduleColors=="lightyellow"], | |
| abs(geneModuleMembership[moduleGenes, column]))) | |
| colnames(membership) <- c("CpG", "ModuleMembership") | |
| membership <- membership[order(membership$ModuleMembership), ] | |
| nrow(membership) | |
| write.csv(membership,"membership.set2.green.csv") | |
| members<-t(datExpr[moduleColors == "lightyellow"]) | |
| df<-as.matrix(members) | |
| #rel_exp<-log2(df+1)-log2(rowMeans(df+1)) | |
| library(gplots) | |
| dim(df) | |
| heatmap.2(df, col=bluered, margins = c(8,9),main = "lightyellow", | |
| trace = "none", density ="none", | |
| Colv=F,Rowv=T, dendrogram = "row", | |
| ColSideColors=c(rep("grey30",100),rep("grey70",100), | |
| rep("grey30",100),rep("grey70",100), | |
| rep("grey30",100),rep("grey70",100))) | |
| write.csv(members,"members.set2.green.csv") | |
| noGreyMembers<-t(datExpr[moduleColors != "grey"]) | |
| head(noGreyMembers[,c(1:3)]) | |
| meanNoGreyMembers <- data.frame( | |
| cbind(rowMeans(noGreyMembers[,c(1:300)]),rowMeans(noGreyMembers[,c(301:600)]), | |
| rowMeans(noGreyMembers[,c(601:900)]),rowMeans(noGreyMembers[,c(901:1200)]), | |
| rowMeans(noGreyMembers[,c(1201:1500)]),rowMeans(noGreyMembers[,c(1501:1800)]), | |
| rowMeans(noGreyMembers[,c(1801:2099)]),rowMeans(noGreyMembers[,c(2100:2399)]))) | |
| colnames(meanNoGreyMembers)<-c("bladder","breast","cerivx","colon","liver","lung.ad","lung.sc","prostate") | |
| head(meanNoGreyMembers) | |
| stripchart(meanNoGreyMembers, method='jitter', vertical=T, pch=20,cex=.9, | |
| col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F, | |
| jitter=.4, ylim=c(-0.1, 1.1)) | |
| b<-boxplot(meanNoGreyMembers, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T, | |
| outline=F, xaxt="n") | |
| lines(b$stats[3,], lwd = 2, lty = 5, type = "o") | |
| # bootstrapping | |
| library(boot) | |
| meanFunc <- function(x,i){mean(x[i])} | |
| results <- boot(meanNoGreyMembers,meanFunc,10000) | |
| summary(results$t) | |
| boot.ci(boot.out = results, conf = 0.95, type="bca") | |
| ############################################################################################################### | |
| ######## | |
| # STOP | |
| # STOP | |
| # STOP | |
| # STOP | |
| # STOP | |
| # STOP | |
| # STOP | |
| ######## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment