Last active
May 17, 2018 07:13
-
-
Save mbk0asis/c7f5eee735a5d5ed4f8bc6c88cc99062 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #source("https://bioconductor.org/biocLite.R") | |
| #biocLite("org.Mm.eg.db") | |
| library("DESeq2") | |
| library("calibrate") | |
| library("pcaExplorer") | |
| library("ggplot2") | |
| library("gplots") | |
| library("ggrepel") | |
| library("gage") | |
| library("AnnotationDbi") | |
| library("org.Mm.eg.db") | |
| setwd("~/BIO5/RNAseq_Mouse_Muscle_Age_HD") | |
| #type="genes" | |
| type="L1.ERV" | |
| cnts <- read.csv(paste("All.",type,".counts.csv",sep=""), header=T,row.names = 1) | |
| dim(cnts) | |
| ######################################### | |
| #cnts_noZero <- cnts[rowMeans(cnts) >= 0,] | |
| #dim(cnts_noZero) | |
| ######################################### | |
| conds <- c(rep("HD_02m",3),rep("HD_16m",3),rep("WT_02m",3),rep("WT_20m",3),rep("WT_28m",3)) | |
| coldat=DataFrame(conds=factor(conds)) | |
| coldat | |
| dds <- DESeqDataSetFromMatrix(cnts[,1:15], colData=coldat, design = ~ conds) | |
| dds <- DESeq(dds, fitType = "mean") | |
| dds | |
| # to obtain normalized count data | |
| cntNorm <- counts(dds, normalized=T) | |
| head(cntNorm) | |
| write.csv(cntNorm,paste("cntNorm_",type,".csv",sep="")) | |
| #tiff(paste(type,"_Boxplot_count_distribution.tif",sep=""),units = "px",width = 600, height = 400) | |
| #par(mfrow=c(1,2)) | |
| #boxplot(log10(cnts), main = "raw counts", | |
| # col=c(rep("red",3),rep("yellow",3),rep("blue",3),rep("cyan",3),rep("darkblue",4))) | |
| #boxplot(log10(cntNorm), main = "cntNorm", | |
| # col=c(rep("red",3),rep("yellow",3),rep("blue",3),rep("cyan",3),rep("darkblue",4))) | |
| #dev.off() | |
| ######################################### | |
| ######################################### | |
| mean_HD_02m <- rowMeans(cntNorm[,1:3]) | |
| mean_HD_16m <- rowMeans(cntNorm[,4:6]) | |
| mean_WT_02m <- rowMeans(cntNorm[,7:9]) | |
| mean_WT_20m <- rowMeans(cntNorm[,10:12]) | |
| mean_WT_28m <- rowMeans(cntNorm[,13:15]) | |
| ######################################### | |
| ######################################### | |
| group1="WT_28m" | |
| mean_group1 <- mean_WT_28m | |
| group2="WT_20m" | |
| mean_group2 <- mean_WT_20m | |
| res <- results(dds, contrast = c("conds", group1, group2)) | |
| # add group mean expression level | |
| res$group1_name <- group1 | |
| res$mean_group1 <- mean_group1 | |
| res$group2_name <- group2 | |
| res$mean_group2 <- mean_group2 | |
| columns("org.Mm.eg.db") | |
| # add gene symbol to res | |
| res$entrez = mapIds(org.Mm.eg.db, | |
| keys=row.names(res), | |
| column="ENTREZID", | |
| keytype="SYMBOL", | |
| multiVals="first") | |
| # add entrez ID to res | |
| res$refseq = mapIds(org.Mm.eg.db, | |
| keys=row.names(res), | |
| column="REFSEQ", | |
| keytype="SYMBOL", | |
| multiVals="first") | |
| head(res) | |
| write.csv(res,paste(type,"_DEGs_",group1,"_vs_",group2,".csv",sep="")) | |
| res_sig <- subset(res, pvalue <= 0.05 & abs(log2FoldChange) >= 1) # sig DEGs, p < 0.05, fc > 2 | |
| res_sig <- res_sig[order(-res_sig$log2FoldChange),] # sort by fold change | |
| write.csv(res_sig,paste(type,"_sig_DEGs_",group1,"_vs_",group2,".csv",sep="")) | |
| ################################################### | |
| nrow1 <- nrow(subset(res, pvalue <= 0.05 & log2FoldChange >= 1)) | |
| nrow2 <- nrow(subset(res, pvalue <= 0.05 & log2FoldChange <= -1)) | |
| res$log2FoldChange[abs(res$log2FoldChange) >=10] <- 10 # replace (foldChange > 10) to 10 | |
| res$pvalue[res$pvalue <= 1e-15] <- 1e-15 # replace (pvalue < 1e-15) to 1e-15 | |
| ################################################### | |
| # Volcano plot - DEGs | |
| tiff(paste(type,"_Volcano_",group1,"_vs_",group2,".tif",sep=""),units = "px",width = 350, height = 350) | |
| group1 | |
| nrow1 | |
| group2 | |
| nrow2 | |
| with(res, plot(log2FoldChange,-log10(pvalue),pch=19,cex=.8, | |
| col=rgb(.5,.5,.5,.5), | |
| main=paste(group2," (",nrow2,") < > ",group1," (",nrow1,")",sep=""))) | |
| with(subset(res, pvalue<=0.05 & log2FoldChange>=1), | |
| points(log2FoldChange,-log10(pvalue),pch=19,cex=.7,col=rgb(1,.2,.2,.5))) | |
| with(subset(res, pvalue<=0.05 & log2FoldChange<=-1), | |
| points(log2FoldChange,-log10(pvalue),pch=19,cex=.7,col=rgb(.2,.6,.2,.4))) | |
| abline(h=c(-log10(0.05)),col="blue",lty=2) | |
| abline(v=c(-1,1),col="blue",lty=2) | |
| abline(v=0,col="blue") | |
| dev.off() | |
| ######################################################## | |
| ######################################################## | |
| ######################################################## | |
| # | |
| # UNDER CONSTRUCTION | |
| # | |
| ################################################### | |
| # GAGE | |
| # Gene Ontology | |
| ## load GO gene sets | |
| #for Bioconductor species supported by go.gsets function: | |
| data(bods) | |
| print(bods) | |
| go.mm=go.gsets(species="Mouse",id.type = "eg") | |
| go.bp=go.mm$go.sets[go.mm$go.subs$BP] | |
| go.cc=go.mm$go.sets[go.mm$go.subs$CC] | |
| go.mf=go.mm$go.sets[go.mm$go.subs$MF] | |
| ## prepare fold-change dataset | |
| foldchanges = res$log2FoldChange | |
| names(foldchanges)=res$entrez | |
| ## run gage | |
| go.p<-gage(foldchanges, gsets=go.bp, same.dir = T, compare='unpaired') | |
| ## Look at both up (greater), down (less), and statatistics. | |
| lapply(go.p, head) | |
| ## Get the GO terms | |
| topN=10 # export top '10' terms | |
| go.terms = data.frame(id=rownames(go.p$greater), go.p$greater) %>% | |
| tbl_df() %>% | |
| filter(row_number()<=topN) %>% | |
| .$id %>% | |
| as.character() | |
| go.terms | |
| go.terms.names = substr(go.terms, start=1, stop=8) | |
| go.terms.names | |
| go.p.sig<-sigGeneSet(go.p, outname="SETDB1.go.bp") | |
| str(go.p.sig, strict.width='wrap') | |
| go.p.2d.sig<-sigGeneSet(go.p, outname="gse16873.kegg") | |
| greater <- go.p$greater | |
| greater_sig <- subset(greater, greater[,4] <= 0.05) | |
| head(greater_sig) | |
| write.csv(greater_sig,"gage_GO_BP_ampl_dipl_up_q0.05.csv") | |
| less <- go.p$less | |
| less_sig <- subset(less, less[,4] <= 0.05) | |
| dim(less_sig) | |
| write.csv(less_sig,"gage_GO_BP_ampl_dipl_down_q0.05.csv") | |
| #### | |
| go.p.ids = substr(keggrespathways, start=1, stop=8) | |
| go.p.ids | |
| ############################################################## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment