Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active September 1, 2016 10:14
Show Gist options
  • Save mbk0asis/12fc787cf52bcd2a0e39e8f48f2dbc9e to your computer and use it in GitHub Desktop.
Save mbk0asis/12fc787cf52bcd2a0e39e8f48f2dbc9e to your computer and use it in GitHub Desktop.
#source("http://bioconductor.org/biocLite.R")
#biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db"))
#biocLite("calibrate")
#biocLite("AnnotationFuncs")
library(DESeq2)
#library(pcaExplorer)
#library(bovine.db)
library(ggplot2)
library(gplots)
library(ggrepel)
library(gage)
setwd("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq")
wd<-"/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq"
cnts_symbol=read.table("Total.counts",row.names = 1, header=T)
cnts_entrez=read.csv("Total.counts.entrez",row.names = 1, header=F)
colnames(cnts_symbol)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7","ii8","ii9","ii10","ii11","ii12","ii13","ii14","ii15","ii16","ii17","ii18","ii19","ii20","ii21",
"ti1","ti2","ti3","ti4","ti5","ti6","ti7","ti8","ti9","ti10","ti11","ti12","ti13","ti14","ti15","ti16","ti17","ti18","ti19","ti20","ti21",
"in1","in2","in3","in4","in5","in6","in7","in8","in9","in10","in11","in12","in13","in14","in15",
"tn1","tn2","tn3","tn4","tn5","tn6","tn7","tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15")
head(cnts_entrez)
# "Total.counts" contains 6 rows of statistical summary of each column
# if haven't remove them, run following
delRow <- nrow(cnts_symbol)-6
cnts_symbol <- head(cnts_symbol,delRow)
#cnts_symbol <- cnts_symbol[,1:57]
# To remove all zero rows
df<-as.data.frame(cnts_symbol)
cnts_symbol_noZero <- df[rowMeans(abs(df[-c(1:2)]))>0,]
#cnts_symbol_noZero<-cnts_symbol_noZero[,1:24]
dim(cnts_symbol_noZero)
head(cnts_symbol_noZero)
conds <- c(rep("ICMi",21)
,rep("TEi",21)
,rep("ICMn",15)
,rep("TEn",15))
coldat=DataFrame(conds=factor(conds))
coldat
dds <- DESeqDataSetFromMatrix(cnts_symbol, colData=coldat, design = ~ conds)
dds <- DESeq(dds)
dds
# to obtain normalized count data
cntNorm_sym <- counts(dds, normalized=T)
dim(cntNorm_sym)
cntNorm_ent <- counts(dds, normalized=T)
head(cntNorm_ent)
boxplot(log2(cntNorm_sym+1), notch=F, boxonly=F, las=2, cex=0.2,
col = c(rep("red",21),rep("palevioletred1",21),
rep("blue2",15),
rep("royalblue2",15),rep("sienna",5))
,outline = F,#ylim=c(0,100),
ylab = "log2 counts", main = "raw cnts")
# PCA plot
rld <- rlogTransformation(dds)
pcaplot(rld,intgroup=c("conds"), ntop=10000, text_labels=F, title="PCA",
pcX=1,pcY=2,point_size=3)
vsd <- varianceStabilizingTransformation(dds)
pcaplot(vsd,intgroup=c("conds"), ntop=20000, text_labels=F, title="PCA",
pcX=1,pcY=2,point_size=3)
# 3D PCA
df<-as.data.frame(cntNorm_sym)
log.pl<-log(df+1)
t.log.pl<-t(log.pl)
pr<-prcomp(t.log.pl) # PCA
plot(pr,type="lines")
#biplot(pr,cex=0.7)
library(rgl)
colnames(df) <- c(rep("ii",21),rep("ti",21),rep("in",15),rep("tn",15))
groups <- rownames(t(df))
levs<- levels(groups)
group.col <-c("cyan","darkblue","hotpink","darkred")
plot3d(pr$x[,1:3], size=2, type="s",box=F,
xlab = "",ylab = "",zlab = "",#col=rainbow(72))
col=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15)))
rgl.bbox(color=c("#3333FF","black"), emission="#3333FF",
specular="#3333FF", shininess=1, alpha=0.8, nticks = 3 )
aspect3d(1,1,1)
writeWebGL(dir=wd, "3d_PCA_cntNorm.html")
rgl.postscript("3d_pca3.pdf","pdf")
#
# Correlation Analysis
chrX_genes<-read.csv("X_genes", row.names=1, header=F)
colnames(chrX_genes)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7",
"ii8","ii9","ii10","ii11","ii12","ii13","ii14",
"ii15","ii16","ii17","ii18","ii19","ii20","ii21",
"ti1","ti2","ti3","ti4","ti5","ti6","ti7",
"ti8","ti9","ti10","ti11","ti12","ti13","ti14",
"ti15","ti16","ti17","ti18","ti19","ti20","ti21",
"in1","in2","in3","in4","in5","in6","in7",
"in8","in9","in10","in11","in12","in13","in14","in15",
"tn1","tn2","tn3","tn4","tn5","tn6","tn7",
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15")
#chrX_genes<-chrX_genes[,c(22:42,58:72)]
head(chrX_genes)
Auto_genes<-read.csv("Auto_genes",row.names = 1)
colnames(Auto_genes)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7",
"ii8","ii9","ii10","ii11","ii12","ii13","ii14",
"ii15","ii16","ii17","ii18","ii19","ii20","ii21",
"ti1","ti2","ti3","ti4","ti5","ti6","ti7",
"ti8","ti9","ti10","ti11","ti12","ti13","ti14",
"ti15","ti16","ti17","ti18","ti19","ti20","ti21",
"in1","in2","in3","in4","in5","in6","in7",
"in8","in9","in10","in11","in12","in13","in14","in15",
"tn1","tn2","tn3","tn4","tn5","tn6","tn7",
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15")
head(Auto_genes)
#Auto_genes<-Auto_genes[,c(22:42,58:72)]
# to calculate group mean
cn<- as.data.frame(chrX_genes)
vars<-c("ii","ti","in","tn")
grp_mean = sapply(vars,function(x) rowMeans(cn[,grep(x,names(cn))]))
head(grp_mean)
dta.cor <- round(cor(grp_mean,method="pearson"),2)
head(dta.cor)
heatmap.2(dta.cor,trace="none",col=bluered(256),density="none",
cellnote = dta.cor,notecol="black",notecex=3,
Rowv = F,Colv = F, dendrogram="none",
key.title="Pearson",key.xlab = NULL,key.ylab = NULL)
#,
RowSideColors=c(rep("cyan",21),
rep("darkblue",21),
rep("hotpink",15),
rep("darkred",15)),
ColSideColors=c(rep("cyan",21),
rep("darkblue",21),
rep("hotpink",15),
rep("darkred",15)))
#
hc<-hclust(dist(dta.cor))
plot(hc,hang=-1,main="Pearson")
# DE analysis
res <- results(dds, contrast = c("conds","ICMn","TEn"))
res_sig <- subset(res, padj<.01 & abs(log2FoldChange)>1)
summary(res_sig)
write.table(res_sig,"DEG_ti_tn")
# volcano plot
#res$padj[-log10(res$padj) > 10] <- 10 # outlier cutoff
#res$log2FoldChange[abs(res$log2FoldChange) > 5] <- 5 # outlier cutoff
with(res, plot(log2FoldChange,-log10(padj),pch=19,cex=0.5,ylim=c(-1,20),main="ICMn vs TEn",col="lightgrey"))
with(subset(res, padj<0.01 & abs(log2FoldChange)>1),points(log2FoldChange,-log10(padj),pch=20,cex=0.5,col="red"))
abline(v=c(-1,1),col="blue",lty=2)
abline(v=0,col="blue")
abline(h=(-log10(0.01)),col="blue",lty=2)
# labeling
library(calibrate)
with(subset(res, -log10(pvalue)>5 & abs(log2FoldChange)>2), textxy(log2FoldChange, -log10(pvalue), labs=rownames(res), cex=0.5))
rownames(res)
# heatmap for DEG
DE_TEi_TEn<-read.csv("DEG_TEi_TEn",row.names = 1)
head(DE_TEi_TEn)
ii_mean<-rowMeans(DE_TEi_TEn[,1:21])
head(ii_mean)
rel_exp<-log2((1+DE_TEi_TEn)/(1+ii_mean))
head(rel_exp)
heatmap.2(as.matrix(rel_exp), trace = "none", col=bluered, #breaks = c(-5,0,5),
Colv=F, dendrogram = "row")
##################################################################################
##################################################################################
# GAGE
library(gage)
data(korg) # available KEGG databases
head(korg,50)
data(bods) # available GO databases
head(bods,50)
cn=colnames(cntNorm_sym)
head(cn)
ICMi=grep("ii",cn,ignore.case =T)
TEi=grep("ti",cn,ignore.case =T)
ICMn=grep("in",cn,ignore.case =T)
TEn=grep("tn",cn,ignore.case =T)
# Symbols2Entrez
library(org.Bt.eg.db)
library(AnnotationFuncs)
genes <- rownames(cntNorm)
head(genes)
#EG <- org.Bt.egSYMBOL2EG[genes]
EG <- translate(genes,org.Bt.egSYMBOL2EG, return.list=T, remove.missing = T,simplify=F)
head(EG)
########################################
# KEGG
# KEGG gene sets for bovine
kg.bta=kegg.gsets("bta")
kegg.gs=kg.bta$kg.sets[kg.bta$sigmet.idx]
kegg.p<-gage(cntNorm_ent, gsets=kegg.gs, ref=TEn, samp=ICMn, same.dir = T, compare='unpaired')
#head(kegg.p$greater[,c(3,4)])
#head(kegg.p$less[,c(3,4)])
kegg.esg.up <- esset.grp(kegg.p$greater,cntNorm_ent,compare='unpaired',
gsets = kegg.gs, ref = TEn, samp = ICMn,
test4up = TRUE, output = TRUE,
outname = "kegg.in_tn.up", make.plot = F)
s
kegg.esg.down <- esset.grp(kegg.p$less,cntNorm_ent,compare='unpaired',
gsets = kegg.gs, ref = TEn, samp = ICMn,
test4up = TRUE, output = TRUE,
outname = "kegg.in_tn.down", make.plot = F)
write.table(kegg.p$greater[,c(3,4)],"gage_KEGG_ii_ti_ti.txt")
write.table(kegg.p$less[,c(3,4)],"gage_KEGG_ii_ti_ii.txt")
#kegg.sig<-sigGeneSet(kegg.p, outname = "kegg.sig")
# PATHVIEW
library(pathview)
head(cntNorm_ent[,43:57])
ii.idx=1:21
ti.idx=22:42
in.idx=43:57
tn.idx=58:72
cnts.d = log2(rowMeans(cntNorm_ent[, ti.idx]))-log2(rowMeans(cntNorm_ent[, tn.idx]))
sel <- kegg.p$greater[, "p.val"] < 0.05 & !is.na(kegg.p$greater[,"p.val"])
path.ids <- rownames(kegg.p$greater)[sel]
sel.l <- kegg.p$less[, "p.val"] < 0.05 & !is.na(kegg.p$less[,"p.val"])
path.ids.l <- rownames(kegg.p$less)[sel.l]
path.ids2 <- substr(c(path.ids, path.ids.l), 1, 8)
pv.out.list <- sapply(path.ids2, function(pid) pathview(gene.data = cnts.d, pathway.id = pid, species = "bta", kegg.native = T))
########################################
# Gene Ontology
## GO gene sets for bovine
go.bta=go.gsets(species="Bovine")
go.bp=go.bta$go.sets[go.bta$go.subs$BP]
go.cc=go.bta$go.sets[go.bta$go.subs$CC]
go.mf=go.bta$go.sets[go.bta$go.subs$MF]
go.p<-gage(cntNorm_ent, gsets=go.bp, ref=ICMi, samp=ICMn, same.dir = T, compare='unpaired')
head(go.p$greater[,1:5])
head(go.p$less[,1:5])
write.table(go.p,"gage_GO_BP_ii_in.txt")
go.sig<-sigGeneSet(go.p, outname = "go.sig")
########################################
# MSigDB
# MSigDB gene sets - GO_BP_symbol
filename=("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/GO_CC.gmt")
msig.gs=readList(filename)
msig.gs[1:3]
msig.p <- gage(cntNorm_sym, gsets=msig.gs, ref=TEi, samp=TEn, same.dir = T, compare='unpaired')
head(msig.p$greater[,c(3,4)])
head(msig.p$less[,c(3,4)])
write.table(msig.p$greater,"gage_MSig_CC_ti_tn_tn.txt")
write.table(msig.p$less,"gage_MSig_CC_ti_tn_ti.txt")
#msig.sig<-sigGeneSet(msig.p, outiame = "msig_ii_ti")
################################################################################
################################################################################
# heatmap for Stemness/TE genes
pl_cntNorm<-read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_Pluri_genes.txt",row.names=1,header=F)
pl_cntNorm<-read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_TE_genes.txt",row.names=1,header=F)
pl<-as.matrix(pl_cntNorm)
colnames(pl)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7",
"ii8","ii9","ii10","ii11","ii12","ii13","ii14",
"ii15","ii16","ii17","ii18","ii19","ii20","ii21",
"ti1","ti2","ti3","ti4","ti5","ti6","ti7",
"ti8","ti9","ti10","ti11","ti12","ti13","ti14",
"ti15","ti16","ti17","ti18","ti19","ti20","ti21",
"in1","in2","in3","in4","in5","in6","in7",
"in8","in9","in10","in11","in12","in13","in14","in15",
"tn1","tn2","tn3","tn4","tn5","tn6","tn7",
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15")
ii.idx=1:21
ti.idx=22:42
in.idx=43:57
tn.idx=57:72
rel.exp = log2(pl[, 1:72]+1)-log2(rowMeans(pl[, ti.idx])+1)
#rel.exp = log2(c(pl[, 22:42],pl[, 57:72])+1)-log2(rowMeans(pl[, ii.idx])+1)
head(rel.exp)
my_palette <- colorRampPalette(c("steelblue", "lightyellow", "red"))
breaks = unique(c(seq(-1,-0.3,length=75),
seq(-0.3,0.3,length=75),
seq(0.3,1,length=75)))
heatmap.2(rel.exp, col=my_palette, breaks=breaks,
main="All rel to TEi (log2)", cexRow = .4,
Rowv = T, Colv = T, dendrogram = "col", density="none", trace="none",
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15)))
#RowSideColors=c(rep("green3",95),rep("brown1",27)))
colnames(pl) <- conds
groups <- colnames(pl)
head(groups)
levs<- levels(groups)
group.col <-c("cyan","darkblue","hotpink","darkred")
log.pl<-log(pl+1)
t.log.pl<-t(log.pl)
pr<-prcomp(t.log.pl)
head(pr)
means.pl = rbind(rowMeans(pl[,ii.idx]),rowMeans(pl[,ti.idx]),rowMeans(pl[,in.idx]),rowMeans(pl[,tn.idx]))
head(means.pl)
rownames(means.pl) <- c("ii","ti","in","tn")
groups <- rownames(means.pl)
head(groups)
levs<- levels(groups)
group.col <-c("cyan","darkblue","hotpink","darkred")
log.pl<-log(means.pl+1)
log.pl<-means.pl
t.log.pl<-t(log.pl)
pr<-prcomp(log.pl)
head(pr)
plot(pr,type="lines")
biplot(pr,cex=0.5)
library(rgl)
plot3d(pr$x[,1:3], size=1, type="s",box=F,
xlab = "",ylab = "",zlab = "",
col=c("cyan","darkblue","hotpink","darkred"))
# col=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15)))
rgl.bbox(color=c("#696969","black"), emission="#696969",
specular="#000000", shininess=1, alpha=0.2, nticks = 3 )
aspect3d(1,1,1)
rgl.postscript("3d_pca3_stemness.pdf","pdf")
# Endoplasmic Reticulum genes
cntNorm_ER <- read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_ER.csv",row.names = 1)
head(cntNorm_ER)
colnames(cntNorm_ER)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7",
"ii8","ii9","ii10","ii11","ii12","ii13","ii14",
"ii15","ii16","ii17","ii18","ii19","ii20","ii21",
"ti1","ti2","ti3","ti4","ti5","ti6","ti7",
"ti8","ti9","ti10","ti11","ti12","ti13","ti14",
"ti15","ti16","ti17","ti18","ti19","ti20","ti21",
"in1","in2","in3","in4","in5","in6","in7",
"in8","in9","in10","in11","in12","in13","in14","in15",
"tn1","tn2","tn3","tn4","tn5","tn6","tn7",
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15")
ii.idx=1:21
ti.idx=22:42
in.idx=43:57
tn.idx=58:72
cn_er<- as.matrix(cntNorm_ER)
rel = log2(1+cn_er[, c(ii.idx,in.idx)])-log2(rowMeans(1+cn_er[, ti.idx]))
rel = log2(1+cn_er[, 1:72])-log2(rowMeans(1+cn_er[, ii.idx]))
my_palette <- colorRampPalette(c("blue", "black", "yellow"))
breaks = unique(c(seq(-4,-2,length=75),
seq(-2,2,length=75),
seq(2,4,length=75)))
heatmap.2(rel,trace="none",col=my_palette,density="none",breaks = breaks,
main="ER",cexRow = 1,
#cellnote = dta.cor,notecol="black",notecex=3,
Rowv = T,Colv = T, dendrogram="both", hclustfun = function(x) hclust(x,method = 'complete'),
key.title="Pearson",key.xlab = NULL,key.ylab = NULL,
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15)))
sampleTree = hclust(dist(rel), method="complete")
plot(sampleTree,main="",sub = "",cex=.5,cex.axis=1,cex.main=1)
abline(h=30,col="red")
library(WGCNA)
clust=cutree(sampleTree,h=30)
clust=cutreeStatic(sampleTree, cutHeight = 30, minSize = 1)
table(clust)
keepSamples=(clust==13)
rel.select=rel[keepSamples,]
head(rel.select)
heatmap.2(rel.select,trace="none",col=my_palette,density="none",breaks = breaks,
main="ER",cexRow = 1,margins=c(5,10),
#cellnote = dta.cor,notecol="black",notecex=3,
Rowv = T,Colv = T, dendrogram="both", hclustfun = function(x) hclust(x,method = 'complete'),
key.title="Pearson",key.xlab = NULL,key.ylab = NULL,
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment