Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active April 27, 2018 05:57
Show Gist options
  • Save mbk0asis/19997c59410bb8ab50b524c9251fe560 to your computer and use it in GitHub Desktop.
Save mbk0asis/19997c59410bb8ab50b524c9251fe560 to your computer and use it in GitHub Desktop.
# deepTools - command line
# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)
$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed
-o SETDB1.low.Matrix.gz --skipZeros -bs 50
-a 1000 -b 1000 --regionBodyLength 5000
-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR
--outFileNameMatrix SETDB1.low.Matrix.tab
# separate header and the data
$ tail -n +4 SETDB1.low.Matrix.tab | sed 's/\t/,/g' > SETDB1.low.Matrix.csv
$ grep -v "#" SETDB1.low.Matrix.tab | head -n1 | sed 's/\t/,/g' > SETDB1.low.Header.csv
# count the number bins per sample
$ grep -v "#" SETDB1.low.Matrix.tab | head -n1 | sed 's/\t/\n/g' | sort | uniq -c
#################################################
# R code for plotting the mean coverage
#################################################
library(reshape2)
setwd('~/00-NGS/SETDB1_TCGA/GSE40419/deepTools')
bin=140 # number of bins counted
samp.cnt=20 # number of samples
bin <- rep(c(1:bin),samp.cnt) # list of bin indexes by each sample
#################################################
high.dta <- read.csv("high.matrix.csv", header=F)
low.dta <- read.csv("low.matrix.csv", header=F)
high.header <- read.csv("high.header.csv", header=F)
low.header <- read.csv("low.header.csv", header=F)
t.high <- t(high.dta)
t.low <- t(low.dta)
high.mean <- apply(t.high,1,mean, na.rm=T) # mean expression of all genes or elements
low.mean <- apply(t.low,1,mean, na.rm=T) # mean expression of all genes or elements
#################################################
# mean expression levels of Setdb1 high samples
df.high <- cbind.data.frame(t(high.header),as.matrix(high.mean))
colnames(df.high) <- c("sample","average")
df.high$bin <- bin
# de-melt data frame (dcast)
dfw.high <- dcast(df.high, sample~bin, value.var="average")
high.mean2 <- apply(dfw.high[,-1],2,mean,na.rm=T) # mean expression of all samples
head(high.mean2)
# mean expression levels of Setdb1 lwo samples
df.low <- cbind.data.frame(t(low.header),as.matrix(low.mean))
colnames(df.low) <- c("sample","average")
df.low$bin <- bin
# de-melt data frame (dcast)
dfw.low <- dcast(df.low, sample~bin, value.var="average")
low.mean2 <- apply(dfw.low[,-1],2,mean,na.rm=T) # mean expression of all samples
head(low.mean2)
#################################################
# line plots
plot(high.mean2,type="l",col="red", lty=1, lwd=3, ylim=c(20,120),
xlab="bin", ylab="mean coverage")
lines(low.mean2,col="blue", lty=1, lwd =3)
legend(1, 110, legend=c("Setdb1 high", "Setdb1 low"),
col=c("red", "blue"), lwd=3, box.lty=0, cex=1.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment