Created
August 30, 2019 05:09
-
-
Save mbk0asis/aaee2021f2a58b609a8ee86094206f75 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BiocUpgrade() | |
source("http://bioconductor.org/biocLite.R") | |
biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db","calibrate","AnnotationFuncs","gage","Rtsne","ggrepel")) | |
#library(TCGAbiolinks) | |
library(DESeq2) | |
library(pcaExplorer) | |
library(ggfortify) | |
library(ggplot2) | |
library(gplots) | |
library(ggrepel) | |
library(gage) | |
library(Rtsne) | |
library(reshape2) | |
library(tidyverse) | |
library(org.Hs.eg.db) | |
wdir<-"~/BIO0/00-NGS/SETDB1_TCGA/" | |
setwd(wdir) | |
luad_tumor <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUAD/exp.LUAD.TUMOR.SETDB1.Top100.Bot100.htseq-count.csv", header=T, row.names = 1) | |
luad_normal <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUAD/exp.LUAD.NORMAL.htseq-counts.csv", header=T, row.names = 1) | |
lusc_tumor <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUSC/exp.LUSC.TUMOR.Setdb1.Top100.Bot100.htseq-count.csv", header=T, row.names = 1) | |
lusc_normal <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUSC/exp.LUSC.NORMAL.htseq-counts.csv", header=T, row.names = 1) | |
nLUAD_Tumor <- ncol(luad_tumor) | |
nLUSC_Tumor <- ncol(lusc_tumor) | |
nLUAD_Normal <- ncol(luad_normal) | |
nLUSC_Normal <- ncol(lusc_normal) | |
Normal <- nLUAD_Normal + nLUSC_Normal | |
cnts <- data.frame(luad_tumor,lusc_tumor,luad_normal,lusc_normal) | |
head(cnts[,1:5]) | |
dim(cnts) | |
# To remove all zero rows | |
cnts_noZero <- cnts[rowMeans(abs(cnts))>0,] | |
dim(cnts_noZero) | |
conds <- c(rep("luad_top100",100),rep("luad_bot100",100),rep("lusc_top100",100),rep("lusc_bot100",100), | |
rep("luad_normal",nLUAD_Normal),rep("lusc_normal",nLUSC_Normal)) | |
conds <- c(rep("luad_top100",100),rep("luad_bot100",100), | |
rep("lusc_top100",100),rep("lusc_bot100",100), | |
rep("normal",108)) | |
coldat=DataFrame(conds=factor(conds)) | |
coldat | |
dds <- DESeqDataSetFromMatrix(cnts_noZero, colData=coldat, design = ~ conds) | |
dds <- DESeq(dds, fitType = "mean") | |
dds | |
# to obtain normalized count data | |
cntNorm <- counts(dds, normalized=T) | |
#write.csv(cntNorm,"cntNorm.LUSC.Setdb1.Top100.Bot100.csv") | |
############################################################################################################ | |
# to select Setdb1 expression Top100/Bot100 | |
############################################## | |
# ggplot | |
setdb1 <- "ENSG00000143379" | |
exp.setdb1 <- cntNorm[grep(setdb1,rownames(cntNorm)),] | |
names(exp.setdb1) | |
type1 <- c(rep("luad_tumor",nLUAD_Tumor), | |
rep("lusc_tumor",nLUSC_Tumor), | |
rep("luad_normal",nLUAD_Normal), | |
rep("lusc_normal",nLUSC_Normal)) | |
type2 <- c(rep("luad_tumor",nLUAD_Tumor), | |
rep("lusc_tumor",nLUSC_Tumor), | |
rep("normal",nLUAD_Normal + nLUSC_Normal)) | |
m.exp.setdb1 <- melt(exp.setdb1) | |
m.exp.setdb1$type1 <- type1 | |
m.exp.setdb1$type2 <- type2 | |
m.exp.setdb1$type1 <- factor(m.exp.setdb1$type1, levels = c("luad_tumor","lusc_tumor","luad_normal","lusc_normal")) | |
m.exp.setdb1.sorted <- m.exp.setdb1[order(type1,-m.exp.setdb1$value),] | |
aggregate(value ~ type1, data=m.exp.setdb1.sorted, FUN = function(x){NROW(x)}) | |
ggplot(m.exp.setdb1.sorted, aes(x=type1, y=log2(value))) + | |
theme_bw() + | |
geom_jitter(width=.1, alpha=.8, aes(color=type1)) + | |
#geom_violin(aes(fill=type2), alpha=.5) + | |
geom_boxplot(width=.4, alpha=.5, outlier.shape = NA) + | |
scale_color_brewer(palette="Set1") | |
############################################### | |
# Generic boxplot | |
nSamples=100 # number of samples to be subsetted | |
nCol <- ncol(cntNorm) | |
setdb1 <- "ENSG00000143379" | |
exp.setdb1 <- cntNorm_Tumor[grep(setdb1,rownames(cntNorm)),] | |
exp.setdb1.Tumor.sorted <- data.frame(exp.setdb1.Tumor[order(exp.setdb1.Tumor)],exp.setdb1.Tumor[order(exp.setdb1.Tumor)]) | |
colnames(exp.setdb1.Tumor.sorted) <- rep("SETDB1",2) | |
dim(exp.setdb1.Tumor.sorted) | |
head(exp.setdb1.Tumor.sorted) | |
# LUAD_Tumor | |
meanHIGH = log2(colMeans(exp.setdb1.Tumor.sorted[topStart:nTumor,])) | |
meanLOW = log2(colMeans(exp.setdb1.Tumor.sorted[1:nSamples,])) | |
high <- tail(exp.setdb1.Tumor.sorted,nSamples) | |
nHigh <- nrow(high) | |
low <- head(exp.setdb1.Tumor.sorted,nSamples) | |
nLow <- nrow(low) | |
log2FC_LUAD_Tumor <- round(meanHIGH[1]-meanLOW[1],2) | |
# LUSC_Tumor | |
cntNorm_Tumor <- cntNorm[,1:nTumor] | |
dim(cntNorm_Tumor) | |
exp.setdb1.Tumor <- cntNorm_Tumor[grep(setdb1,rownames(cntNorm_Tumor)),] | |
exp.setdb1.Tumor.sorted <- data.frame(exp.setdb1.Tumor[order(exp.setdb1.Tumor)],exp.setdb1.Tumor[order(exp.setdb1.Tumor)]) | |
colnames(exp.setdb1.Tumor.sorted) <- rep("SETDB1",2) | |
dim(exp.setdb1.Tumor.sorted) | |
head(exp.setdb1.Tumor.sorted) | |
meanHIGH = log2(colMeans(exp.setdb1.Tumor.sorted[topStart:nTumor,])) | |
meanLOW = log2(colMeans(exp.setdb1.Tumor.sorted[1:nSamples,])) | |
high <- tail(exp.setdb1.Tumor.sorted,nSamples) | |
nHigh_lusc_tumor <- nrow(high) | |
low <- head(exp.setdb1.Tumor.sorted,nSamples) | |
nLow_lusc_tumor <- nrow(low) | |
log2FC_LUSC_Tumor <- round(meanHIGH[1]-meanLOW[1],2) | |
nLUAD_Tumor <- ncol(luad_tumor) | |
nLUAD_Normal <- ncol(luad_normal) | |
nLUSC_Tumor <- ncol(lusc_tumor) | |
nLUSC_Normal <- ncol(lusc_normal) | |
# Normal | |
cntNorm_Normal <- cntNorm[,(nTumor+1):nCol] | |
exp.setdb1.Normal <- cntNorm_Normal[grep(setdb1,rownames(cntNorm_Normal)),] | |
exp.setdb1.Normal.sorted <- data.frame(exp.setdb1.Normal[order(exp.setdb1.Normal)],exp.setdb1.Normal[order(exp.setdb1.Normal)]) | |
colnames(exp.setdb1.Normal.sorted) <- rep("SETDB1",2) | |
mean_Normal <- round(log2(colMeans(exp.setdb1.Normal.sorted)),2) | |
# Boxplots | |
par(mfrow=c(1,2)) | |
boxplot(log2(exp.setdb1.Tumor.sorted[,1]),outline=T,pars=list(outcol="white"),col=(rgb(1,1,1,0)), ylim=c(10,14),#axes=F, | |
main=paste("LUSC Tumor\nSETDB1 Top",nSamples,"vs Bot",nSamples,"\nlog2FC = ",log2FC,"(~",round(2^log2FC,2),")"), | |
ylab = "log2(cntNorm)") | |
stripchart(log2(high),vertical=T, method = 'jitter', add=T, | |
pch=20, cex=.9, col=(rgb(.8,.2,.2,.5))) | |
#stripchart(log2(mid),vertical=T, method = 'jitter', add=T, | |
# pch=20, cex=.9, col=(rgb(.8,.8,.8,.7))) | |
stripchart(log2(low),vertical=T, method = 'jitter', add=T, | |
pch=20, cex=.9, col=(rgb(.2,.2,.8,.5))) | |
points(meanHIGH[1],pch=7,lwd=4) | |
text(meanHIGH[1],paste("mean_HIGH\n= ",round(meanHIGH[1],2)),offset=2,pos=4) | |
points(meanLOW[1],pch=7,lwd=4) | |
text(meanLOW[1],paste("mean_LOW\n= ",round(meanLOW[1],2)),offset=2,pos=4) | |
#write.csv(high[,1:2], paste("LUSC.SETDB1.Top",nSamples,".samples",sep="")) | |
#write.csv(low[,1:2], paste("LUSC.SETDB1.Bot",nSamples,".samples",sep="")) | |
boxplot(log2(exp.setdb1.Normal.sorted[,1]),outline=T,bty=F, | |
pars=list(outcol="white"),col=(rgb(1,1,1,0)),ylim=c(10,14), | |
main = "LUSC Normal") | |
stripchart(log2(exp.setdb1.Normal.sorted),vertical=T, method = 'jitter', add=T, | |
pch=20, cex=.9, col=(rgb(.3,.3,.3,.8))) | |
points(mean_Normal,pch=7,lwd=4) | |
text(mean_Normal,paste("mean_Norm\n= ",round(mean_Normal,2)), offset=3, pos=4) | |
#dev.off() | |
# | |
############################################################################################################ | |
# | |
# PCA plot | |
rld <- rlogTransformation(dds,fitType = "mean") | |
#pcaplot(rld,intgroup=c("conds"),ntop=50000, text_labels=F, title="PCA", | |
# pcX=1,pcY=2,point_size=3, ellipse = TRUE) | |
vsd <- varianceStabilizingTransformation(dds, fitType = "mean") | |
X=1 | |
Y=2 | |
ntop=30000 | |
pcaData <- pcaplot(vsd, intgroup=c("conds"), pcX=X, pcY=Y, returnData=TRUE, ntop = ntop) | |
percentVar <- round(100 * attr(pcaData, "percentVar")) | |
pcaData$conds <- factor(pcaData$conds, levels = c('luad_top100','luad_bot100','lusc_top100','lusc_bot100','normal')) | |
ggplot(pcaData, aes(pcaData[,1], pcaData[,2], color=conds)) + theme_bw() + | |
geom_point(size=3, alpha = .7) + #ggtitle("PCA - LUAD & LUSC\nSETDB1 Top100 vs Bot100 vs Normal") + | |
xlab(paste0("PC",X,": ",percentVar[1],"% variance")) + | |
ylab(paste0("PC",Y,": ",percentVar[2],"% variance")) + | |
#coord_fixed() + | |
scale_color_manual(values=c("blue","red","orange","#00BFC4","yellow4","yellow4")) + | |
stat_ellipse(aes(x=pcaData[,1], y=pcaData[,2]),type = "t") | |
######################################################## | |
# t-SNE | |
dta <- assay(vsd) | |
head(dta) | |
dta_sorted <- dta[order(-rowMeans(dta)),] | |
dta_sorted[5,1:5] | |
conds <- c(rep("luad_top100",100),rep("luad_bot100",100), | |
rep("lusc_top100",100),rep("lusc_bot100",100),rep("normal",108)) | |
goi <- data.frame(dta[grepl("ENSG00000143379", rownames(dta)), ]) | |
colnames(goi) <-"goi" # gene of interset | |
mid<-mean(goi) | |
head(goi) | |
d <- dist(t(dta_sorted[1:30000,])) | |
for ( p in seq(10,50,10)){ | |
set.seed(50) # tsne has some stochastic steps (gradient descent) so need to set random | |
perplexity = 30 | |
tsne_out <- Rtsne(d, is_distance=T, perplexity=perplexity, | |
num_threads=20, verbose=TRUE) | |
## | |
dta2 <- data.frame(tsne_out$Y, conds, goi) | |
ggplot(dta2, aes(x=dta2[,1],y=dta2[,2], color=goi)) +#, color=conds)) + | |
theme_bw() + | |
geom_point(alpha=.5, size = 3) + | |
xlab(paste0("Dimension 1")) + ylab(paste0("Dimension 2")) + | |
scale_color_gradient2(midpoint=mid,low="steelblue",mid="white",high="red",space ="Lab") | |
#scale_color_manual(values=c("blue","red","#00BFC4","orange","yellow4")) | |
#stat_ellipse(aes(x=dta2[,1], y=dta2[,2]),type = "t") | |
ggsave(paste0("tSNE_LUAD_LUSC_SETDB1_Top.Bot100_Normal_perplexity_",perplexity,".png"), | |
width = 6, height = 4.5, units = "in") | |
} | |
''' | |
########################################################### | |
# DE analysis | |
Sample <- "top100" | |
Control <- "bot100" | |
mean_top100 <- rowMeans(cntNorm[,1:100]) | |
mean_bot100 <- rowMeans(cntNorm[,101:200]) | |
sd_top100 <- rowSds(cntNorm[,1:100]) | |
sd_bot100 <- rowSds(cntNorm[,101:200]) | |
res <- results(dds, contrast = c("conds",Sample,Control)) | |
res$mean_top100 <- mean_top100 | |
res$sd_top100 <- sd_top100 | |
res$mean_bot100 <- mean_bot100 | |
res$sd_bot100 <- sd_bot100 | |
res$symbol = mapIds(org.Hs.eg.db, | |
keys=row.names(res), | |
column="SYMBOL", | |
keytype="ENSEMBL", | |
multiVals="first") | |
# add entrez ID to res | |
res$entrez = mapIds(org.Hs.eg.db, | |
keys=row.names(res), | |
column="ENTREZID", | |
keytype="ENSEMBL", | |
multiVals="first") | |
head(res) | |
#res2 <- res[,c(2,5:11)] | |
#dim(res2) | |
#write.csv(res2, "Whole_Gene_DE_results.csv") | |
# sig DEGs | |
res_sig <- subset(res, padj<=.00001 & abs(log2FoldChange)>=1) | |
head(res_sig) | |
dim(res_sig) | |
#write.csv(res_sig,"sig_DEGs_LUSC_SETDB1_Top100_Bot100_q0.00001_fc2.csv") | |
# volcano plot | |
nTop100 <- nrow(subset(res, padj<=.00001 & log2FoldChange > 1)) | |
nBot100 <- nrow(subset(res, padj<=.00001 & log2FoldChange < -1)) | |
nTop100 | |
nBot100 | |
res$padj[res$padj <= 1e-20] <- 1e-20 | |
res$log2FoldChange[abs(res$log2FoldChange) > 6] <- 6 # outlier cutoff | |
with(res, plot(log2FoldChange,-log10(padj), | |
pch=19,cex=0.5,main=paste("TCGA LUSC\nSETDB1 (q<0.00001, fc>2)\n Bot100 (",nBot100,") vs Top100 (",nTop100,")",sep=""), | |
col="lightgrey")) | |
with(subset(res, padj<=.00001 & log2FoldChange > 1), | |
points(log2FoldChange,-log10(padj),pch=19,cex=0.7,col=rgb(1,.6,0,.7))) | |
with(subset(res, padj<=.00001 & log2FoldChange < -1), | |
points(log2FoldChange,-log10(padj),pch=19,cex=0.7,col=rgb(.2,.6,.2,.7))) | |
abline(v=c(-1,1),col="blue",lty=2) | |
abline(v=0,col="blue") | |
abline(h=c(-log10(.00001)),col="blue",lty=2) | |
################################################################################## | |
################################################################################## | |
# GAGE | |
library(gage) | |
data(korg) # available KEGG databases | |
head(korg,50) | |
data(bods) # available GO databases | |
head(bods,50) | |
######################################## | |
# KEGG | |
# KEGG gene sets | |
kg.bta=kegg.gsets("hsa") | |
kegg.gs=kg.bta$kg.sets[kg.bta$sigmet.idx] | |
# to export KEGG gene sets | |
#kegg.gs.table <- plyr::ldply(kegg.gs, rbind) | |
#write.csv(kegg.gs.table,"kegg.gs.table.csv") | |
# | |
foldchanges = res$log2FoldChange | |
names(foldchanges) = res_sig$entrez | |
kegg.p<-gage(foldchanges, gsets=kegg.gs, same.dir=T, compare='unpaired') | |
head(kegg.p$greater[,c(3,4)]) | |
head(kegg.p$less[,c(3,4)]) | |
write.table(kegg.p$greater[,c(3,4)],"gage_KEGG_SETDB1_Top100.txt") | |
write.table(kegg.p$less[,c(3,4)],"gage_KEGG_SETDB1_Bot100.txt") | |
# PATHVIEW | |
library(pathview) | |
cnts.d = log2(rowMeans(cntNorm_ent[, ft.idx]))-log2(rowMeans(cntNorm_ent[, fi.idx])) | |
sel <- kegg.p$greater[, "p.val"] < 0.05 & !is.na(kegg.p$greater[,"p.val"]) | |
path.ids <- rownames(kegg.p$greater)[sel] | |
sel.l <- kegg.p$less[, "p.val"] < 0.05 & !is.na(kegg.p$less[,"p.val"]) | |
path.ids.l <- rownames(kegg.p$less)[sel.l] | |
path.ids2 <- substr(c(path.ids, path.ids.l), 1, 8) | |
pv.out.list <- sapply(path.ids2, function(pid) pathview(gene.data = cnts.d, pathway.id = pid, species = "bta", kegg.native = T)) | |
######################################## | |
# Gene Ontology | |
## GO gene sets for Human | |
go.gs=go.gsets(species="Human") | |
go.bp=go.gs$go.sets[go.gs$go.subs$BP] | |
go.cc=go.gs$go.sets[go.gs$go.subs$CC] | |
go.mf=go.gs$go.sets[go.gs$go.subs$MF] | |
foldchanges = res$log2FoldChange | |
names(foldchanges) = res$entrez | |
head(foldchanges) | |
go.p<-gage(foldchanges, gsets=go.bp, same.dir=T, compare='unpaired') | |
head(go.p$greater[,c(3,4)]) | |
head(go.p$less[,c(3,4)]) | |
go_ampl <- go.p$greater[,3:4] | |
go_ampl_sig <- subset(go_ampl,go_ampl[,1] <= 0.05) | |
dim(go_ampl_sig) | |
head(go_ampl_sig) | |
tail(go_ampl_sig) | |
write.csv(go_ampl_sig,"gage_GO_BP_LUSC_SETDB1_Top100.csv") | |
go_dipl <- go.p$less[,3:4] | |
go_dipl_sig <- subset(go_dipl, go_dipl[,1] <= 0.05) | |
dim(go_dipl_sig) | |
head(go_dipl_sig) | |
tail(go_dipl_sig) | |
write.csv(go_dipl_sig,"gage_GO_BP_LUSC_SETDB1_Bot100.csv") | |
############## | |
foldchanges = data.frame(res$entrez, res$log2FoldChange, res$padj) | |
res.ampl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange >= 1) | |
res.dipl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange <= -1) | |
res.ampl <- subset(foldchanges,res.log2FoldChange >= 1) | |
res.dipl <- subset(foldchanges,res.log2FoldChange <= -1) | |
head(res.dipl) | |
terms <- read.csv("terms", header = F) | |
for (t in terms[,1]){ | |
print(t) | |
glist<-data.frame(unlist(go.bp[t])) | |
colnames(glist) <- "entrez" | |
dim(glist) | |
dim(res.ampl) | |
overlapGenes <- merge(glist, foldchanges, by.x="entrez",by.y="res.entrez") | |
overlapGenes2 <- subset(overlapGenes, res.padj <= 0.05) | |
genes <- mapIds(org.Hs.eg.db, | |
keys=as.character(overlapGenes2[,1]), | |
keytype="ENTREZID", | |
column="SYMBOL") | |
genes <- paste(genes,collapse=",") | |
print(genes) | |
# output[[t]] <- paste(t,genes, sep=";") | |
} | |
output | |
######################################## | |
# MSigDB | |
# MSigDB gene sets - GO_BP_symbol | |
filename=("msigdb.v6.1.symbols.gmt") | |
msig.gs=readList(filename) | |
foldchanges = res$log2FoldChange | |
names(foldchanges) = res$symbol | |
head(foldchanges) | |
ampl.LUSC <- cntNorm[,1:74] | |
dipl.LUSC <- cntNorm[,75:206] | |
msig.p<-gage(foldchanges, gsets=msig.gs, same.dir=T, compare='unpaired') | |
msig.greater <- msig.p$greater[,c(3,4)] | |
go.msig.greater <- msig.greater[grep("GO_", rownames(msig.greater)),] | |
sig.go.msig.greater <- subset(go.msig.greater, go.msig.greater[,2] <= 0.05) | |
dim(sig.go.msig.greater) | |
head(sig.go.msig.greater,20) | |
msig.less <- msig.p$less[,c(3,4)] | |
go.msig.less <- msig.less[grep("GO_", rownames(msig.less)),] | |
sig.go.msig.less <- subset(go.msig.less, go.msig.less [,2] <= 0.05) | |
dim(sig.go.msig.less) | |
head(sig.go.msig.less,20) | |
write.csv(sig.go.msig.greater,"gage_MSig_BP_Setdb1_Top100.csv") | |
write.csv(sig.go.msig.less,"gage_MSig_BP_Setdb1_Bot100.csv") | |
######################## | |
foldchanges = data.frame(res$symbol, res$log2FoldChange, res$padj) | |
head(foldchanges) | |
#res.ampl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange >= 1) | |
#res.dipl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange <= -1) | |
res.ampl <- subset(foldchanges, res.padj<=.05) | |
res.dipl <- subset(foldchanges, res.padj<=.05) | |
#res.ampl <- subset(foldchanges,res.log2FoldChange >= 1) | |
#res.dipl <- subset(foldchanges,res.log2FoldChange <= -1) | |
terms <- row.names(sig.go.msig.greater) | |
terms <- row.names(sig.go.msig.less) | |
output <- list() | |
for (t in terms){ | |
# print(t) | |
glist<-data.frame(unlist(msig.gs[t])) | |
colnames(glist) <- "symbol" | |
overlapGenes <- merge(glist, res.ampl, by.x="symbol",by.y="res.symbol") | |
genes <- paste(overlapGenes$symbol,collapse=",") | |
# print(genes) | |
output[[t]] <- paste(genes, sep=";") | |
} | |
write.csv(data.frame(unlist(output)),"gage_MSig_ampl.csv") | |
############################################################################### | |
# survival plot | |
surv <- read.csv("Clinical.Setdb1.Top100.Bot100.csv", header = T) | |
surv2 <- surv[,c(8,11,12)] | |
setdb1 <- c(rep("top100",116), rep("bot100",118)) | |
surv <- read.csv("Clinical.Setdb1.Top100.Bot100.uniq.csv", header = T) | |
head(surv) | |
tail(surv) | |
surv2 <- surv[,c(2:4)] | |
head(surv2) | |
dim(surv2) | |
setdb1 <- c(rep("top100",99), rep("bot100",100)) | |
dta <- data.frame(surv2, setdb1) | |
head(dta) | |
tail(dta) | |
TCGAanalyze_survival(dta, clusterCol="setdb1",risk.table = FALSE, | |
height = 4, width = 6, dpi = 300, | |
conf.int = T, xlim = 4000, | |
color = c("blue","red")) | |
############################################################################### | |
# TE / Epi-drivers - PCA/heatmaps | |
te <- "ERVL.3kb" | |
dta <- read.csv( paste("cntNorm.LUSC.Setdb1.Top100.Bot100.Ensembl_Genes.TSS.5kb.",te,".csv",sep=""), row.names = 1, header=F) | |
dta <- read.csv("cntNorm.LUSC.Setdb1.Top100.Bot100.EpiDrivers.csv", row.names = 1, header=F) | |
head(dta[,1:5]) | |
nGenes <- nrow(dta) | |
mean_bot100 <- rowMeans(dta[,101:200]) | |
fc <- log2(dta + 1) - log2(mean_bot100 + 1) | |
my_palette <- colorRampPalette(c("steelblue", "lightyellow", "red")) | |
breaks = unique(c(seq(-2,0,length=75), seq(0,2,length=75))) | |
heatmap.2(as.matrix(fc),density="none",trace="none", col="bluered", breaks=breaks, | |
Rowv = T, Colv = T, dendrogram="row", margins = c(8,8), | |
ColSideColors = c(rep("orange",100),rep("darkgreen",100)), | |
RowSideColors = c(rep("white",95),"black",rep("white",63)), | |
main = paste("EpiDrivers (",nGenes,")",sep="")) | |
main = paste("TSS < 5kb,\n",te," (",nGenes,")",sep="")) | |
# PCA | |
dta <- read.csv("cntNorm.LUSC.Setdb1.Top100.Bot100.EpiDrivers.csv", row.names = 1, header=F) | |
head(dta[,1:5]) | |
dta.sorted <- dta[order(rowMeans(dta),decreasing=T), ] | |
head(dta.sorted[,1:5]) | |
pc <- prcomp(t(dta.sorted[1:100,])) | |
autoplot(pc, data= sampleInfo, | |
colour="conds", alpha=.5, | |
size=2) + theme_bw() | |
coord_cartesian(xlim = c(-0.05, 0.1), ylim = c(-0.1,0.1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment