Last active
September 1, 2016 10:14
-
-
Save mbk0asis/12fc787cf52bcd2a0e39e8f48f2dbc9e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source("http://bioconductor.org/biocLite.R") | |
#biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db")) | |
#biocLite("calibrate") | |
#biocLite("AnnotationFuncs") | |
library(DESeq2) | |
#library(pcaExplorer) | |
#library(bovine.db) | |
library(ggplot2) | |
library(gplots) | |
library(ggrepel) | |
library(gage) | |
setwd("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq") | |
wd<-"/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq" | |
cnts_symbol=read.table("Total.counts",row.names = 1, header=T) | |
cnts_entrez=read.csv("Total.counts.entrez",row.names = 1, header=F) | |
colnames(cnts_symbol)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7","ii8","ii9","ii10","ii11","ii12","ii13","ii14","ii15","ii16","ii17","ii18","ii19","ii20","ii21", | |
"ti1","ti2","ti3","ti4","ti5","ti6","ti7","ti8","ti9","ti10","ti11","ti12","ti13","ti14","ti15","ti16","ti17","ti18","ti19","ti20","ti21", | |
"in1","in2","in3","in4","in5","in6","in7","in8","in9","in10","in11","in12","in13","in14","in15", | |
"tn1","tn2","tn3","tn4","tn5","tn6","tn7","tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15") | |
head(cnts_entrez) | |
# "Total.counts" contains 6 rows of statistical summary of each column | |
# if haven't remove them, run following | |
delRow <- nrow(cnts_symbol)-6 | |
cnts_symbol <- head(cnts_symbol,delRow) | |
#cnts_symbol <- cnts_symbol[,1:57] | |
# To remove all zero rows | |
df<-as.data.frame(cnts_symbol) | |
cnts_symbol_noZero <- df[rowMeans(abs(df[-c(1:2)]))>0,] | |
#cnts_symbol_noZero<-cnts_symbol_noZero[,1:24] | |
dim(cnts_symbol_noZero) | |
head(cnts_symbol_noZero) | |
conds <- c(rep("ICMi",21) | |
,rep("TEi",21) | |
,rep("ICMn",15) | |
,rep("TEn",15)) | |
coldat=DataFrame(conds=factor(conds)) | |
coldat | |
dds <- DESeqDataSetFromMatrix(cnts_symbol, colData=coldat, design = ~ conds) | |
dds <- DESeq(dds) | |
dds | |
# to obtain normalized count data | |
cntNorm_sym <- counts(dds, normalized=T) | |
dim(cntNorm_sym) | |
cntNorm_ent <- counts(dds, normalized=T) | |
head(cntNorm_ent) | |
boxplot(log2(cntNorm_sym+1), notch=F, boxonly=F, las=2, cex=0.2, | |
col = c(rep("red",21),rep("palevioletred1",21), | |
rep("blue2",15), | |
rep("royalblue2",15),rep("sienna",5)) | |
,outline = F,#ylim=c(0,100), | |
ylab = "log2 counts", main = "raw cnts") | |
# PCA plot | |
rld <- rlogTransformation(dds) | |
pcaplot(rld,intgroup=c("conds"), ntop=10000, text_labels=F, title="PCA", | |
pcX=1,pcY=2,point_size=3) | |
vsd <- varianceStabilizingTransformation(dds) | |
pcaplot(vsd,intgroup=c("conds"), ntop=20000, text_labels=F, title="PCA", | |
pcX=1,pcY=2,point_size=3) | |
# 3D PCA | |
df<-as.data.frame(cntNorm_sym) | |
log.pl<-log(df+1) | |
t.log.pl<-t(log.pl) | |
pr<-prcomp(t.log.pl) # PCA | |
plot(pr,type="lines") | |
#biplot(pr,cex=0.7) | |
library(rgl) | |
colnames(df) <- c(rep("ii",21),rep("ti",21),rep("in",15),rep("tn",15)) | |
groups <- rownames(t(df)) | |
levs<- levels(groups) | |
group.col <-c("cyan","darkblue","hotpink","darkred") | |
plot3d(pr$x[,1:3], size=2, type="s",box=F, | |
xlab = "",ylab = "",zlab = "",#col=rainbow(72)) | |
col=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15))) | |
rgl.bbox(color=c("#3333FF","black"), emission="#3333FF", | |
specular="#3333FF", shininess=1, alpha=0.8, nticks = 3 ) | |
aspect3d(1,1,1) | |
writeWebGL(dir=wd, "3d_PCA_cntNorm.html") | |
rgl.postscript("3d_pca3.pdf","pdf") | |
# | |
# Correlation Analysis | |
chrX_genes<-read.csv("X_genes", row.names=1, header=F) | |
colnames(chrX_genes)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7", | |
"ii8","ii9","ii10","ii11","ii12","ii13","ii14", | |
"ii15","ii16","ii17","ii18","ii19","ii20","ii21", | |
"ti1","ti2","ti3","ti4","ti5","ti6","ti7", | |
"ti8","ti9","ti10","ti11","ti12","ti13","ti14", | |
"ti15","ti16","ti17","ti18","ti19","ti20","ti21", | |
"in1","in2","in3","in4","in5","in6","in7", | |
"in8","in9","in10","in11","in12","in13","in14","in15", | |
"tn1","tn2","tn3","tn4","tn5","tn6","tn7", | |
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15") | |
#chrX_genes<-chrX_genes[,c(22:42,58:72)] | |
head(chrX_genes) | |
Auto_genes<-read.csv("Auto_genes",row.names = 1) | |
colnames(Auto_genes)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7", | |
"ii8","ii9","ii10","ii11","ii12","ii13","ii14", | |
"ii15","ii16","ii17","ii18","ii19","ii20","ii21", | |
"ti1","ti2","ti3","ti4","ti5","ti6","ti7", | |
"ti8","ti9","ti10","ti11","ti12","ti13","ti14", | |
"ti15","ti16","ti17","ti18","ti19","ti20","ti21", | |
"in1","in2","in3","in4","in5","in6","in7", | |
"in8","in9","in10","in11","in12","in13","in14","in15", | |
"tn1","tn2","tn3","tn4","tn5","tn6","tn7", | |
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15") | |
head(Auto_genes) | |
#Auto_genes<-Auto_genes[,c(22:42,58:72)] | |
# to calculate group mean | |
cn<- as.data.frame(chrX_genes) | |
vars<-c("ii","ti","in","tn") | |
grp_mean = sapply(vars,function(x) rowMeans(cn[,grep(x,names(cn))])) | |
head(grp_mean) | |
dta.cor <- round(cor(grp_mean,method="pearson"),2) | |
head(dta.cor) | |
heatmap.2(dta.cor,trace="none",col=bluered(256),density="none", | |
cellnote = dta.cor,notecol="black",notecex=3, | |
Rowv = F,Colv = F, dendrogram="none", | |
key.title="Pearson",key.xlab = NULL,key.ylab = NULL) | |
#, | |
RowSideColors=c(rep("cyan",21), | |
rep("darkblue",21), | |
rep("hotpink",15), | |
rep("darkred",15)), | |
ColSideColors=c(rep("cyan",21), | |
rep("darkblue",21), | |
rep("hotpink",15), | |
rep("darkred",15))) | |
# | |
hc<-hclust(dist(dta.cor)) | |
plot(hc,hang=-1,main="Pearson") | |
# DE analysis | |
res <- results(dds, contrast = c("conds","ICMn","TEn")) | |
res_sig <- subset(res, padj<.01 & abs(log2FoldChange)>1) | |
summary(res_sig) | |
write.table(res_sig,"DEG_ti_tn") | |
# volcano plot | |
#res$padj[-log10(res$padj) > 10] <- 10 # outlier cutoff | |
#res$log2FoldChange[abs(res$log2FoldChange) > 5] <- 5 # outlier cutoff | |
with(res, plot(log2FoldChange,-log10(padj),pch=19,cex=0.5,ylim=c(-1,20),main="ICMn vs TEn",col="lightgrey")) | |
with(subset(res, padj<0.01 & abs(log2FoldChange)>1),points(log2FoldChange,-log10(padj),pch=20,cex=0.5,col="red")) | |
abline(v=c(-1,1),col="blue",lty=2) | |
abline(v=0,col="blue") | |
abline(h=(-log10(0.01)),col="blue",lty=2) | |
# labeling | |
library(calibrate) | |
with(subset(res, -log10(pvalue)>5 & abs(log2FoldChange)>2), textxy(log2FoldChange, -log10(pvalue), labs=rownames(res), cex=0.5)) | |
rownames(res) | |
# heatmap for DEG | |
DE_TEi_TEn<-read.csv("DEG_TEi_TEn",row.names = 1) | |
head(DE_TEi_TEn) | |
ii_mean<-rowMeans(DE_TEi_TEn[,1:21]) | |
head(ii_mean) | |
rel_exp<-log2((1+DE_TEi_TEn)/(1+ii_mean)) | |
head(rel_exp) | |
heatmap.2(as.matrix(rel_exp), trace = "none", col=bluered, #breaks = c(-5,0,5), | |
Colv=F, dendrogram = "row") | |
################################################################################## | |
################################################################################## | |
# GAGE | |
library(gage) | |
data(korg) # available KEGG databases | |
head(korg,50) | |
data(bods) # available GO databases | |
head(bods,50) | |
cn=colnames(cntNorm_sym) | |
head(cn) | |
ICMi=grep("ii",cn,ignore.case =T) | |
TEi=grep("ti",cn,ignore.case =T) | |
ICMn=grep("in",cn,ignore.case =T) | |
TEn=grep("tn",cn,ignore.case =T) | |
# Symbols2Entrez | |
library(org.Bt.eg.db) | |
library(AnnotationFuncs) | |
genes <- rownames(cntNorm) | |
head(genes) | |
#EG <- org.Bt.egSYMBOL2EG[genes] | |
EG <- translate(genes,org.Bt.egSYMBOL2EG, return.list=T, remove.missing = T,simplify=F) | |
head(EG) | |
######################################## | |
# KEGG | |
# KEGG gene sets for bovine | |
kg.bta=kegg.gsets("bta") | |
kegg.gs=kg.bta$kg.sets[kg.bta$sigmet.idx] | |
kegg.p<-gage(cntNorm_ent, gsets=kegg.gs, ref=TEn, samp=ICMn, same.dir = T, compare='unpaired') | |
#head(kegg.p$greater[,c(3,4)]) | |
#head(kegg.p$less[,c(3,4)]) | |
kegg.esg.up <- esset.grp(kegg.p$greater,cntNorm_ent,compare='unpaired', | |
gsets = kegg.gs, ref = TEn, samp = ICMn, | |
test4up = TRUE, output = TRUE, | |
outname = "kegg.in_tn.up", make.plot = F) | |
s | |
kegg.esg.down <- esset.grp(kegg.p$less,cntNorm_ent,compare='unpaired', | |
gsets = kegg.gs, ref = TEn, samp = ICMn, | |
test4up = TRUE, output = TRUE, | |
outname = "kegg.in_tn.down", make.plot = F) | |
write.table(kegg.p$greater[,c(3,4)],"gage_KEGG_ii_ti_ti.txt") | |
write.table(kegg.p$less[,c(3,4)],"gage_KEGG_ii_ti_ii.txt") | |
#kegg.sig<-sigGeneSet(kegg.p, outname = "kegg.sig") | |
# PATHVIEW | |
library(pathview) | |
head(cntNorm_ent[,43:57]) | |
ii.idx=1:21 | |
ti.idx=22:42 | |
in.idx=43:57 | |
tn.idx=58:72 | |
cnts.d = log2(rowMeans(cntNorm_ent[, ti.idx]))-log2(rowMeans(cntNorm_ent[, tn.idx])) | |
sel <- kegg.p$greater[, "p.val"] < 0.05 & !is.na(kegg.p$greater[,"p.val"]) | |
path.ids <- rownames(kegg.p$greater)[sel] | |
sel.l <- kegg.p$less[, "p.val"] < 0.05 & !is.na(kegg.p$less[,"p.val"]) | |
path.ids.l <- rownames(kegg.p$less)[sel.l] | |
path.ids2 <- substr(c(path.ids, path.ids.l), 1, 8) | |
pv.out.list <- sapply(path.ids2, function(pid) pathview(gene.data = cnts.d, pathway.id = pid, species = "bta", kegg.native = T)) | |
######################################## | |
# Gene Ontology | |
## GO gene sets for bovine | |
go.bta=go.gsets(species="Bovine") | |
go.bp=go.bta$go.sets[go.bta$go.subs$BP] | |
go.cc=go.bta$go.sets[go.bta$go.subs$CC] | |
go.mf=go.bta$go.sets[go.bta$go.subs$MF] | |
go.p<-gage(cntNorm_ent, gsets=go.bp, ref=ICMi, samp=ICMn, same.dir = T, compare='unpaired') | |
head(go.p$greater[,1:5]) | |
head(go.p$less[,1:5]) | |
write.table(go.p,"gage_GO_BP_ii_in.txt") | |
go.sig<-sigGeneSet(go.p, outname = "go.sig") | |
######################################## | |
# MSigDB | |
# MSigDB gene sets - GO_BP_symbol | |
filename=("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/GO_CC.gmt") | |
msig.gs=readList(filename) | |
msig.gs[1:3] | |
msig.p <- gage(cntNorm_sym, gsets=msig.gs, ref=TEi, samp=TEn, same.dir = T, compare='unpaired') | |
head(msig.p$greater[,c(3,4)]) | |
head(msig.p$less[,c(3,4)]) | |
write.table(msig.p$greater,"gage_MSig_CC_ti_tn_tn.txt") | |
write.table(msig.p$less,"gage_MSig_CC_ti_tn_ti.txt") | |
#msig.sig<-sigGeneSet(msig.p, outiame = "msig_ii_ti") | |
################################################################################ | |
################################################################################ | |
# heatmap for Stemness/TE genes | |
pl_cntNorm<-read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_Pluri_genes.txt",row.names=1,header=F) | |
pl_cntNorm<-read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_TE_genes.txt",row.names=1,header=F) | |
pl<-as.matrix(pl_cntNorm) | |
colnames(pl)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7", | |
"ii8","ii9","ii10","ii11","ii12","ii13","ii14", | |
"ii15","ii16","ii17","ii18","ii19","ii20","ii21", | |
"ti1","ti2","ti3","ti4","ti5","ti6","ti7", | |
"ti8","ti9","ti10","ti11","ti12","ti13","ti14", | |
"ti15","ti16","ti17","ti18","ti19","ti20","ti21", | |
"in1","in2","in3","in4","in5","in6","in7", | |
"in8","in9","in10","in11","in12","in13","in14","in15", | |
"tn1","tn2","tn3","tn4","tn5","tn6","tn7", | |
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15") | |
ii.idx=1:21 | |
ti.idx=22:42 | |
in.idx=43:57 | |
tn.idx=57:72 | |
rel.exp = log2(pl[, 1:72]+1)-log2(rowMeans(pl[, ti.idx])+1) | |
#rel.exp = log2(c(pl[, 22:42],pl[, 57:72])+1)-log2(rowMeans(pl[, ii.idx])+1) | |
head(rel.exp) | |
my_palette <- colorRampPalette(c("steelblue", "lightyellow", "red")) | |
breaks = unique(c(seq(-1,-0.3,length=75), | |
seq(-0.3,0.3,length=75), | |
seq(0.3,1,length=75))) | |
heatmap.2(rel.exp, col=my_palette, breaks=breaks, | |
main="All rel to TEi (log2)", cexRow = .4, | |
Rowv = T, Colv = T, dendrogram = "col", density="none", trace="none", | |
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15))) | |
#RowSideColors=c(rep("green3",95),rep("brown1",27))) | |
colnames(pl) <- conds | |
groups <- colnames(pl) | |
head(groups) | |
levs<- levels(groups) | |
group.col <-c("cyan","darkblue","hotpink","darkred") | |
log.pl<-log(pl+1) | |
t.log.pl<-t(log.pl) | |
pr<-prcomp(t.log.pl) | |
head(pr) | |
means.pl = rbind(rowMeans(pl[,ii.idx]),rowMeans(pl[,ti.idx]),rowMeans(pl[,in.idx]),rowMeans(pl[,tn.idx])) | |
head(means.pl) | |
rownames(means.pl) <- c("ii","ti","in","tn") | |
groups <- rownames(means.pl) | |
head(groups) | |
levs<- levels(groups) | |
group.col <-c("cyan","darkblue","hotpink","darkred") | |
log.pl<-log(means.pl+1) | |
log.pl<-means.pl | |
t.log.pl<-t(log.pl) | |
pr<-prcomp(log.pl) | |
head(pr) | |
plot(pr,type="lines") | |
biplot(pr,cex=0.5) | |
library(rgl) | |
plot3d(pr$x[,1:3], size=1, type="s",box=F, | |
xlab = "",ylab = "",zlab = "", | |
col=c("cyan","darkblue","hotpink","darkred")) | |
# col=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15))) | |
rgl.bbox(color=c("#696969","black"), emission="#696969", | |
specular="#000000", shininess=1, alpha=0.2, nticks = 3 ) | |
aspect3d(1,1,1) | |
rgl.postscript("3d_pca3_stemness.pdf","pdf") | |
# Endoplasmic Reticulum genes | |
cntNorm_ER <- read.csv("/home/bio1/00-NGS/RNAseq/single_ICM_TE/DESeq/cntNorm_ER.csv",row.names = 1) | |
head(cntNorm_ER) | |
colnames(cntNorm_ER)=c("ii1","ii2","ii3","ii4","ii5","ii6","ii7", | |
"ii8","ii9","ii10","ii11","ii12","ii13","ii14", | |
"ii15","ii16","ii17","ii18","ii19","ii20","ii21", | |
"ti1","ti2","ti3","ti4","ti5","ti6","ti7", | |
"ti8","ti9","ti10","ti11","ti12","ti13","ti14", | |
"ti15","ti16","ti17","ti18","ti19","ti20","ti21", | |
"in1","in2","in3","in4","in5","in6","in7", | |
"in8","in9","in10","in11","in12","in13","in14","in15", | |
"tn1","tn2","tn3","tn4","tn5","tn6","tn7", | |
"tn8","tn9","tn10","tn11","tn12","tn13","tn14","tn15") | |
ii.idx=1:21 | |
ti.idx=22:42 | |
in.idx=43:57 | |
tn.idx=58:72 | |
cn_er<- as.matrix(cntNorm_ER) | |
rel = log2(1+cn_er[, c(ii.idx,in.idx)])-log2(rowMeans(1+cn_er[, ti.idx])) | |
rel = log2(1+cn_er[, 1:72])-log2(rowMeans(1+cn_er[, ii.idx])) | |
my_palette <- colorRampPalette(c("blue", "black", "yellow")) | |
breaks = unique(c(seq(-4,-2,length=75), | |
seq(-2,2,length=75), | |
seq(2,4,length=75))) | |
heatmap.2(rel,trace="none",col=my_palette,density="none",breaks = breaks, | |
main="ER",cexRow = 1, | |
#cellnote = dta.cor,notecol="black",notecex=3, | |
Rowv = T,Colv = T, dendrogram="both", hclustfun = function(x) hclust(x,method = 'complete'), | |
key.title="Pearson",key.xlab = NULL,key.ylab = NULL, | |
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15))) | |
sampleTree = hclust(dist(rel), method="complete") | |
plot(sampleTree,main="",sub = "",cex=.5,cex.axis=1,cex.main=1) | |
abline(h=30,col="red") | |
library(WGCNA) | |
clust=cutree(sampleTree,h=30) | |
clust=cutreeStatic(sampleTree, cutHeight = 30, minSize = 1) | |
table(clust) | |
keepSamples=(clust==13) | |
rel.select=rel[keepSamples,] | |
head(rel.select) | |
heatmap.2(rel.select,trace="none",col=my_palette,density="none",breaks = breaks, | |
main="ER",cexRow = 1,margins=c(5,10), | |
#cellnote = dta.cor,notecol="black",notecex=3, | |
Rowv = T,Colv = T, dendrogram="both", hclustfun = function(x) hclust(x,method = 'complete'), | |
key.title="Pearson",key.xlab = NULL,key.ylab = NULL, | |
ColSideColors=c(rep("cyan",21),rep("darkblue",21),rep("hotpink",15),rep("darkred",15))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment