mbk0asis · April 27, 2018 05:57
diff --git a/mean_expression_deepTools b/mean_expression_deepTools
 # deepTools - command line
 # Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)
    
 $ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed 
 -o SETDB1.low.Matrix.gz --skipZeros -bs 50 
 -a 1000 -b 1000 --regionBodyLength 5000 
 -bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR 
 --outFileNameMatrix SETDB1.low.Matrix.tab


 # separate header and the data
 $ tail -n +4 SETDB1.low.Matrix.tab | sed 's/\t/,/g' > SETDB1.low.Matrix.csv
 $ grep -v "#" SETDB1.low.Matrix.tab | head -n1 | sed 's/\t/,/g' > SETDB1.low.Header.csv


 # count the number bins per sample
 $ grep -v "#" SETDB1.low.Matrix.tab | head -n1 | sed 's/\t/\n/g' | sort | uniq -c


 #################################################
 # R code for plotting the mean coverage
 #################################################
 library(reshape2)

 setwd('~/00-NGS/SETDB1_TCGA/GSE40419/deepTools')

 bin=140       # number of bins counted 
 samp.cnt=20   # number of samples   
 bin <- rep(c(1:bin),samp.cnt)  # list of bin indexes by each sample
 #################################################

 high.dta <- read.csv("high.matrix.csv", header=F)
 low.dta <- read.csv("low.matrix.csv", header=F)

 high.header <- read.csv("high.header.csv", header=F)
 low.header <- read.csv("low.header.csv", header=F)

 t.high <- t(high.dta)
 t.low <- t(low.dta)

 high.mean <- apply(t.high,1,mean, na.rm=T) # mean expression of all genes or elements
 low.mean <- apply(t.low,1,mean, na.rm=T)   # mean expression of all genes or elements

 #################################################
 # mean expression levels of Setdb1 high samples
 df.high <- cbind.data.frame(t(high.header),as.matrix(high.mean))
 colnames(df.high) <- c("sample","average")
 df.high$bin <- bin
 # de-melt data frame (dcast)
 dfw.high <- dcast(df.high, sample~bin, value.var="average")
 high.mean2 <- apply(dfw.high[,-1],2,mean,na.rm=T)   # mean expression of all samples
 head(high.mean2)

 # mean expression levels of Setdb1 lwo samples
 df.low <- cbind.data.frame(t(low.header),as.matrix(low.mean))
 colnames(df.low) <- c("sample","average")
 df.low$bin <- bin
 # de-melt data frame (dcast)
 dfw.low <- dcast(df.low, sample~bin, value.var="average")
 low.mean2 <- apply(dfw.low[,-1],2,mean,na.rm=T)   # mean expression of all samples
 head(low.mean2)

 #################################################
 # line plots
 plot(high.mean2,type="l",col="red", lty=1, lwd=3, ylim=c(20,120),
     xlab="bin", ylab="mean coverage")
 lines(low.mean2,col="blue", lty=1, lwd =3)
 legend(1, 110, legend=c("Setdb1 high", "Setdb1 low"),
       col=c("red", "blue"), lwd=3, box.lty=0, cex=1.2)
	# deepTools - command line
	# Use "--outFileNameMatrix" option to export data set (data are already oriented by strand)

	$ computeMatrix scale-regions -p 40 -S low/*.bw -R ~/00-NGS/SETDB1_TCGA/GSE40419/promoter.10kb.bed
	-o SETDB1.low.Matrix.gz --skipZeros -bs 50
	-a 1000 -b 1000 --regionBodyLength 5000
	-bl ~/00-NGS/SETDB1_TCGA/GSE40419/blackList_LINE.LTR
	--outFileNameMatrix SETDB1.low.Matrix.tab


	# separate header and the data
	$ tail -n +4 SETDB1.low.Matrix.tab \| sed 's/\t/,/g' > SETDB1.low.Matrix.csv
	$ grep -v "#" SETDB1.low.Matrix.tab \| head -n1 \| sed 's/\t/,/g' > SETDB1.low.Header.csv


	# count the number bins per sample
	$ grep -v "#" SETDB1.low.Matrix.tab \| head -n1 \| sed 's/\t/\n/g' \| sort \| uniq -c


	#################################################
	# R code for plotting the mean coverage
	#################################################
	library(reshape2)

	setwd('~/00-NGS/SETDB1_TCGA/GSE40419/deepTools')

	bin=140 # number of bins counted
	samp.cnt=20 # number of samples
	bin <- rep(c(1:bin),samp.cnt) # list of bin indexes by each sample
	#################################################

	high.dta <- read.csv("high.matrix.csv", header=F)
	low.dta <- read.csv("low.matrix.csv", header=F)

	high.header <- read.csv("high.header.csv", header=F)
	low.header <- read.csv("low.header.csv", header=F)

	t.high <- t(high.dta)
	t.low <- t(low.dta)

	high.mean <- apply(t.high,1,mean, na.rm=T) # mean expression of all genes or elements
	low.mean <- apply(t.low,1,mean, na.rm=T) # mean expression of all genes or elements

	#################################################
	# mean expression levels of Setdb1 high samples
	df.high <- cbind.data.frame(t(high.header),as.matrix(high.mean))
	colnames(df.high) <- c("sample","average")
	df.high$bin <- bin
	# de-melt data frame (dcast)
	dfw.high <- dcast(df.high, sample~bin, value.var="average")
	high.mean2 <- apply(dfw.high[,-1],2,mean,na.rm=T) # mean expression of all samples
	head(high.mean2)

	# mean expression levels of Setdb1 lwo samples
	df.low <- cbind.data.frame(t(low.header),as.matrix(low.mean))
	colnames(df.low) <- c("sample","average")
	df.low$bin <- bin
	# de-melt data frame (dcast)
	dfw.low <- dcast(df.low, sample~bin, value.var="average")
	low.mean2 <- apply(dfw.low[,-1],2,mean,na.rm=T) # mean expression of all samples
	head(low.mean2)

	#################################################
	# line plots
	plot(high.mean2,type="l",col="red", lty=1, lwd=3, ylim=c(20,120),
	xlab="bin", ylab="mean coverage")
	lines(low.mean2,col="blue", lty=1, lwd =3)
	legend(1, 110, legend=c("Setdb1 high", "Setdb1 low"),
	col=c("red", "blue"), lwd=3, box.lty=0, cex=1.2)
No results found