Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active May 17, 2018 07:13
Show Gist options
  • Select an option

  • Save mbk0asis/c7f5eee735a5d5ed4f8bc6c88cc99062 to your computer and use it in GitHub Desktop.

Select an option

Save mbk0asis/c7f5eee735a5d5ed4f8bc6c88cc99062 to your computer and use it in GitHub Desktop.
#source("https://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
library("DESeq2")
library("calibrate")
library("pcaExplorer")
library("ggplot2")
library("gplots")
library("ggrepel")
library("gage")
library("AnnotationDbi")
library("org.Mm.eg.db")
setwd("~/BIO5/RNAseq_Mouse_Muscle_Age_HD")
#type="genes"
type="L1.ERV"
cnts <- read.csv(paste("All.",type,".counts.csv",sep=""), header=T,row.names = 1)
dim(cnts)
#########################################
#cnts_noZero <- cnts[rowMeans(cnts) >= 0,]
#dim(cnts_noZero)
#########################################
conds <- c(rep("HD_02m",3),rep("HD_16m",3),rep("WT_02m",3),rep("WT_20m",3),rep("WT_28m",3))
coldat=DataFrame(conds=factor(conds))
coldat
dds <- DESeqDataSetFromMatrix(cnts[,1:15], colData=coldat, design = ~ conds)
dds <- DESeq(dds, fitType = "mean")
dds
# to obtain normalized count data
cntNorm <- counts(dds, normalized=T)
head(cntNorm)
write.csv(cntNorm,paste("cntNorm_",type,".csv",sep=""))
#tiff(paste(type,"_Boxplot_count_distribution.tif",sep=""),units = "px",width = 600, height = 400)
#par(mfrow=c(1,2))
#boxplot(log10(cnts), main = "raw counts",
# col=c(rep("red",3),rep("yellow",3),rep("blue",3),rep("cyan",3),rep("darkblue",4)))
#boxplot(log10(cntNorm), main = "cntNorm",
# col=c(rep("red",3),rep("yellow",3),rep("blue",3),rep("cyan",3),rep("darkblue",4)))
#dev.off()
#########################################
#########################################
mean_HD_02m <- rowMeans(cntNorm[,1:3])
mean_HD_16m <- rowMeans(cntNorm[,4:6])
mean_WT_02m <- rowMeans(cntNorm[,7:9])
mean_WT_20m <- rowMeans(cntNorm[,10:12])
mean_WT_28m <- rowMeans(cntNorm[,13:15])
#########################################
#########################################
group1="WT_28m"
mean_group1 <- mean_WT_28m
group2="WT_20m"
mean_group2 <- mean_WT_20m
res <- results(dds, contrast = c("conds", group1, group2))
# add group mean expression level
res$group1_name <- group1
res$mean_group1 <- mean_group1
res$group2_name <- group2
res$mean_group2 <- mean_group2
columns("org.Mm.eg.db")
# add gene symbol to res
res$entrez = mapIds(org.Mm.eg.db,
keys=row.names(res),
column="ENTREZID",
keytype="SYMBOL",
multiVals="first")
# add entrez ID to res
res$refseq = mapIds(org.Mm.eg.db,
keys=row.names(res),
column="REFSEQ",
keytype="SYMBOL",
multiVals="first")
head(res)
write.csv(res,paste(type,"_DEGs_",group1,"_vs_",group2,".csv",sep=""))
res_sig <- subset(res, pvalue <= 0.05 & abs(log2FoldChange) >= 1) # sig DEGs, p < 0.05, fc > 2
res_sig <- res_sig[order(-res_sig$log2FoldChange),] # sort by fold change
write.csv(res_sig,paste(type,"_sig_DEGs_",group1,"_vs_",group2,".csv",sep=""))
###################################################
nrow1 <- nrow(subset(res, pvalue <= 0.05 & log2FoldChange >= 1))
nrow2 <- nrow(subset(res, pvalue <= 0.05 & log2FoldChange <= -1))
res$log2FoldChange[abs(res$log2FoldChange) >=10] <- 10 # replace (foldChange > 10) to 10
res$pvalue[res$pvalue <= 1e-15] <- 1e-15 # replace (pvalue < 1e-15) to 1e-15
###################################################
# Volcano plot - DEGs
tiff(paste(type,"_Volcano_",group1,"_vs_",group2,".tif",sep=""),units = "px",width = 350, height = 350)
group1
nrow1
group2
nrow2
with(res, plot(log2FoldChange,-log10(pvalue),pch=19,cex=.8,
col=rgb(.5,.5,.5,.5),
main=paste(group2," (",nrow2,") < > ",group1," (",nrow1,")",sep="")))
with(subset(res, pvalue<=0.05 & log2FoldChange>=1),
points(log2FoldChange,-log10(pvalue),pch=19,cex=.7,col=rgb(1,.2,.2,.5)))
with(subset(res, pvalue<=0.05 & log2FoldChange<=-1),
points(log2FoldChange,-log10(pvalue),pch=19,cex=.7,col=rgb(.2,.6,.2,.4)))
abline(h=c(-log10(0.05)),col="blue",lty=2)
abline(v=c(-1,1),col="blue",lty=2)
abline(v=0,col="blue")
dev.off()
########################################################
########################################################
########################################################
#
# UNDER CONSTRUCTION
#
###################################################
# GAGE
# Gene Ontology
## load GO gene sets
#for Bioconductor species supported by go.gsets function:
data(bods)
print(bods)
go.mm=go.gsets(species="Mouse",id.type = "eg")
go.bp=go.mm$go.sets[go.mm$go.subs$BP]
go.cc=go.mm$go.sets[go.mm$go.subs$CC]
go.mf=go.mm$go.sets[go.mm$go.subs$MF]
## prepare fold-change dataset
foldchanges = res$log2FoldChange
names(foldchanges)=res$entrez
## run gage
go.p<-gage(foldchanges, gsets=go.bp, same.dir = T, compare='unpaired')
## Look at both up (greater), down (less), and statatistics.
lapply(go.p, head)
## Get the GO terms
topN=10 # export top '10' terms
go.terms = data.frame(id=rownames(go.p$greater), go.p$greater) %>%
tbl_df() %>%
filter(row_number()<=topN) %>%
.$id %>%
as.character()
go.terms
go.terms.names = substr(go.terms, start=1, stop=8)
go.terms.names
go.p.sig<-sigGeneSet(go.p, outname="SETDB1.go.bp")
str(go.p.sig, strict.width='wrap')
go.p.2d.sig<-sigGeneSet(go.p, outname="gse16873.kegg")
greater <- go.p$greater
greater_sig <- subset(greater, greater[,4] <= 0.05)
head(greater_sig)
write.csv(greater_sig,"gage_GO_BP_ampl_dipl_up_q0.05.csv")
less <- go.p$less
less_sig <- subset(less, less[,4] <= 0.05)
dim(less_sig)
write.csv(less_sig,"gage_GO_BP_ampl_dipl_down_q0.05.csv")
####
go.p.ids = substr(keggrespathways, start=1, stop=8)
go.p.ids
##############################################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment