Skip to content

Instantly share code, notes, and snippets.

@arq5x
Last active December 31, 2015 01:19
Show Gist options
  • Save arq5x/7912989 to your computer and use it in GitHub Desktop.
Save arq5x/7912989 to your computer and use it in GitHub Desktop.
Bedtools protocols.
# /home/arq5x/cphg-home/projects/bedtools-curr-prot-bx
#######################################################
# 1. Create subsamples of BAM file and convert to BED.
#######################################################
bedtools bamtobed -i ~/cphg-quinlan/projects/rs-exome/bam/1478PC0009B.conc.on.pos.bam | \
cut -f 1-3 \
> datasets/sample.100M.bam.bed &
samtools view -us 0.10 ~/cphg-quinlan/projects/rs-exome/bam/1478PC0009B.conc.on.pos.bam | \
bedtools bamtobed -i - | \
cut -f 1-3 \
> datasets/sample.10M.bam.bed &
samtools view -us 0.01 ~/cphg-quinlan/projects/rs-exome/bam/1478PC0009B.conc.on.pos.bam | \
bedtools bamtobed -i - | \
cut -f 1-3 \
> datasets/sample.1M.bam.bed &
#######################################################
# 2. Collect run times for 2.16, 2.17, 2.18, and bedops
#######################################################
pwd
/if2/arq5x/cphg-quinlan/projects/bedtools-curr-prot-bx
# bedtools2 -sorted (bedtools-s)
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.10K.bam.bed -c -sorted > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.100K.bam.bed -c -sorted > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.1M.bam.bed -c -sorted > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.10M.bam.bed -c -sorted > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.100M.bam.bed -c -sorted > /dev/null
# bedtools2 (bedtools-u)
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.10K.bam.bed -c > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.100K.bam.bed -c > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.1M.bam.bed -c > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.10M.bam.bed -c > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedtools intersect -a datasets/ccds.exons.bed -b datasets/sample.100M.bam.bed -c > /dev/null
# bedmap (bedmap)
# bedmap requires sort -k1,1 -k2,2n -k3,3n
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.10K.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.100K.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.1M.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.10M.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.100M.bam.bedmap.bed > /dev/null
# bedmap with error checking (bedmap-e
# bedmap requires sort-bed
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --ec --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.10K.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --ec --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.100K.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --ec --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.1M.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --ec --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.10M.bam.bedmap.bed > /dev/null
/if2/arq5x/cphg-quinlan/shared/bin/runit /if2/arq5x/cphg-quinlan/shared/bin/bedmap --ec --echo --count --bp-ovr 1 datasets/ccds.exons.bedmap.bed datasets/sample.100M.bam.bedmap.bed > /dev/null
tool time mem size
bedtools-s 0.65 2.2 1e4
bedtools-s 0.86 2.2 1e5
bedtools-s 1.25 2.2 1e6
bedtools-s 5.30 3.5 1e7
bedtools-s 47.81 15.9 1e8
bedtools-u 0.87 13.5 1e4
bedtools-u 1.06 52.1 1e5
bedtools-u 2.06 454.0 1e6
bedtools-u 13.6 44042.2 1e7
bedtools-u NA NA 1e8
bedops 0.81 1.8 1e4
bedops 0.95 2.6 1e5
bedops 1.32 2.8 1e6
bedops 6.26 3.2 1e7
bedops 60.69 6.00 1e8
bedops-e 1.13 0.8 1e4
bedops-e 1.39 0.8 1e5
bedops-e 2.62 0.9 1e6
bedops-e 16.05 1.2 1e7
bedops-e 164.18 7.9 1e8
setwd('/Users/arq5x/Documents/Career/QuinlanArticles/2014/bedtools_curr_protocols_bx/protocols/speed')
library(ggplot2)
theme_set(theme_bw(18))
tool_comparo <- read.table("speed-comparo.txt",header=TRUE)
bts <- subset(tool_comparo, tool %in% c("bedtools-s"))
btu <- subset(tool_comparo, tool %in% c("bedtools-u"))
bor <- subset(tool_comparo, tool %in% c("bedops"))
boe <- subset(tool_comparo, tool %in% c("bedops-e"))
plot(bts[,4], bts[,2], xaxt = "n", yaxt="n",
log="x", 'b', col="darkred",
lwd=3, ylim=c(0,180),
xlab="Number of BAM alignments",
ylab="Time in seconds")
points(btu[,4], btu[,2], xaxt = "n", 'b', col="darkred", lwd=3, lty=3)
points(bor[,4], bor[,2], xaxt = "n", 'b', col="darkgray", lwd=3)
points(boe[,4], boe[,2], xaxt = "n", 'b', col="darkgray", lwd=3, lty=3)
axis(2, at=c(0,10,20,40,60,120,180), labels=c(0,10,20,40,60,120,180))
axis(1, at=c(1e4,1e5,1e6,1e7,1e8), labels=c("1e4","1e5","1e6","1e7","1e8"))
# add a legend.
legend('topleft', c("bedtools (sorted)","bedtools (unsorted)", "bedops", "bedops w/ error checking"),
lwd=c(3,3,3,3), lty=c(1,3,1,3), col=c('darkred', 'darkred', 'darkgray', 'darkgray'), bty='n')
# memory plot
plot(bts[,4], log10(bts[,3]), xaxt = "n", yaxt="n",
log="x", 'b', col="darkred",
lwd=3, ylim=c(0,6),
xlab="Number of BAM alignments",
ylab="RAM")
points(btu[,4], log10(btu[,3]), xaxt = "n", 'b', col="darkred", lwd=3, lty=3)
points(bor[,4], log10(bor[,3]), xaxt = "n", 'b', col="darkgray", lwd=3)
points(boe[,4], log10(boe[,3]), xaxt = "n", 'b', col="darkgray", lwd=3, lty=3)
axis(2, at=c(0,1,2,3,4,5), labels=c(0,"10Mb","100Mb","1Gb","10Gb","100Gb"))
axis(1, at=c(1e4,1e5,1e6,1e7,1e8), labels=c("1e4","1e5","1e6","1e7","1e8"))
# add a legend.
legend('topleft', c("bedtools (sorted)","bedtools (unsorted)", "bedops", "bedops w/ error checking"),
lwd=c(3,3,3,3), lty=c(1,3,1,3), col=c('darkred', 'darkred', 'darkgray', 'darkgray'), bty='n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment