Skip to content

Instantly share code, notes, and snippets.

@bpow
Created March 5, 2013 04:40
Show Gist options
  • Save bpow/5088067 to your computer and use it in GitHub Desktop.
Save bpow/5088067 to your computer and use it in GitHub Desktop.
merge together bam files, optionally remapping sample names and optionally providing a single chromosome of output
#!/bin/bash
JAVA_OPTS="-Dgroovy.grape.report.downloads=true" //usr/bin/env groovy "$0" $@; exit $?
@GrabResolver(name="maven-sandbox-picard", root="https://github.com/bpow/maven-sandbox/raw/master/picard")
@Grab(group="net.sf.picard", module="picard", version="1.86-SNAPSHOT")
import net.sf.samtools.*
import net.sf.picard.sam.*
def mergeAndRemap(chromosome, mapfile, infiles, outfile) {
readers = infiles.collect{ new SAMFileReader(new File(it)) }
headers = readers*.getFileHeader()
headerMerger = new SamFileHeaderMerger(headers[0].sortOrder, headers, true)
mergedHeader = headerMerger.mergedHeader
if (mapfile) {
remaps = [:]
new File(mapfile).eachLine {
kv = it.split('\t')
remaps[kv[0]] = kv[1]
}
mergedHeader.readGroups.each { rg ->
if (remaps.containsKey(rg.sample)) {
rg.sample = remaps[rg.sample]
}
}
}
SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(true)
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(mergedHeader, true, new File(outfile))
if (chromosome) {
queries = [:]
readers.each { queries[it] = it.queryOverlapping(chromosome, 0, 0) }
iterator = new MergingSamRecordIterator(headerMerger, queries, true)
} else {
iterator = new MergingSamRecordIterator(headerMerger, readers, true)
}
long readsProcessed = 0
long startTime = System.currentTimeMillis()
iterator.each {
writer.addAlignment(it)
if (readsProcessed++ % 1000000 == 0) {
System.err.println("${it.referenceName}:${it.alignmentStart}, ${readsProcessed-1} reads completed, total time (minutes): ${(System.currentTimeMillis() - startTime)/ 60000.0}")
}
}
}
def cli = new CliBuilder(usage: 'mergeAndRemapSamples [options] [bamfiles]')
cli.with {
c longOpt: 'chromosome', args: 1, required: false, 'Chromosome to include in the output'
o longOpt: 'output', args: 1, required: false, 'File to output (default = /dev/stdout)'
m longOpt: 'map', args: 1, required: false, 'File containing tab-delimited remapping of sample names'
h longOpt: 'help', required: false, 'Print command help (hey, you just did that!)'
}
def opt = cli.parse(args)
if (opt.h) {
cli.usage()
} else {
def outfile = opt.o ? opt.o : '/dev/stdout'
mergeAndRemap(opt.c, opt.m, opt.arguments(), outfile)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment