-
-
Save jgarces02/39cd7073038a42213641bae644c647de to your computer and use it in GitHub Desktop.
R code to plot fraction of captured target bases over depth for exome experiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Assumes you've already run coverageBed -hist, and grep'd '^all'. E.g. something like: | |
# find *bam | parallel 'bedtools coverage -hist -b {} -a /home/jgarces/genomes/sureSelect/sureSelect_v5_UTR/S04380219_Covered.bed | grep ^all > coverages/{}.hist.all.txt' | |
# Get a list of the bedtools output files you'd like to read in (all-in-one) | |
#print(files <- list.files(pattern="all.txt$")) | |
# Get a list of the bedtools output files you'd like to read in (by list) | |
args <- commandArgs(trailingOn = TRUE) | |
files <- scan(args[1], what = "", sep = "\n") | |
print(files <- paste0(files, ".hist.all.txt")) | |
# Optional, create short sample names from the filenames. | |
print(labs <- gsub("prefixToTrash-0|_GRCh37_sorted_merged_merged\\.nodups\\.bam\\.hist\\.all\\.txt", "", files, perl = TRUE)) | |
# Create lists to hold coverage and cumulative coverage for each alignment, and read the data into these lists. | |
cov <- list() | |
cov_cumul <- list() | |
for (i in 1:length(files)) { | |
cov[[i]] <- read.table(files[i]) | |
cov_cumul[[i]] <- 1-cumsum(cov[[i]][,5]) | |
} | |
# Pick some colors | |
library(RColorBrewer) | |
cols <- brewer.pal(length(cov), "Dark2") | |
# Save the graph to a file | |
png(paste0("exome-coverage-plots.", args[1], ".png"), h=1000, w=1000, pointsize=20) | |
# Create plot area, but do not plot anything. Add gridlines and axis labels. | |
plot(cov[[1]][2:401, 2], cov_cumul[[1]][1:400], type='n', xlab="Depth", ylab="Fraction of capture target bases \u2265 depth", ylim=c(0,1.0), main="Target Region Coverage") | |
abline(v = c(20,50,80,100), h = c(0.50,0.90), col = "gray60") | |
axis(1, at=c(20,50,80), labels=c(20,50,80)) | |
axis(2, at=c(0.90), labels=c(0.90)) | |
axis(2, at=c(0.50), labels=c(0.50)) | |
# Actually plot the data for each of the alignments (stored in the lists). | |
for (i in 1:length(cov)) points(cov[[i]][2:401, 2], cov_cumul[[i]][1:400], type='l', lwd=3, col=cols[i]) | |
# Add a legend using the nice sample labeles rather than the full filenames. | |
legend("topright", legend=labs, col=cols, lty=1, lwd=4) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment