Created
March 5, 2013 04:40
-
-
Save bpow/5088067 to your computer and use it in GitHub Desktop.
merge together bam files, optionally remapping sample names and optionally providing a single chromosome of output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
JAVA_OPTS="-Dgroovy.grape.report.downloads=true" //usr/bin/env groovy "$0" $@; exit $? | |
@GrabResolver(name="maven-sandbox-picard", root="https://github.com/bpow/maven-sandbox/raw/master/picard") | |
@Grab(group="net.sf.picard", module="picard", version="1.86-SNAPSHOT") | |
import net.sf.samtools.* | |
import net.sf.picard.sam.* | |
def mergeAndRemap(chromosome, mapfile, infiles, outfile) { | |
readers = infiles.collect{ new SAMFileReader(new File(it)) } | |
headers = readers*.getFileHeader() | |
headerMerger = new SamFileHeaderMerger(headers[0].sortOrder, headers, true) | |
mergedHeader = headerMerger.mergedHeader | |
if (mapfile) { | |
remaps = [:] | |
new File(mapfile).eachLine { | |
kv = it.split('\t') | |
remaps[kv[0]] = kv[1] | |
} | |
mergedHeader.readGroups.each { rg -> | |
if (remaps.containsKey(rg.sample)) { | |
rg.sample = remaps[rg.sample] | |
} | |
} | |
} | |
SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(true) | |
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(mergedHeader, true, new File(outfile)) | |
if (chromosome) { | |
queries = [:] | |
readers.each { queries[it] = it.queryOverlapping(chromosome, 0, 0) } | |
iterator = new MergingSamRecordIterator(headerMerger, queries, true) | |
} else { | |
iterator = new MergingSamRecordIterator(headerMerger, readers, true) | |
} | |
long readsProcessed = 0 | |
long startTime = System.currentTimeMillis() | |
iterator.each { | |
writer.addAlignment(it) | |
if (readsProcessed++ % 1000000 == 0) { | |
System.err.println("${it.referenceName}:${it.alignmentStart}, ${readsProcessed-1} reads completed, total time (minutes): ${(System.currentTimeMillis() - startTime)/ 60000.0}") | |
} | |
} | |
} | |
def cli = new CliBuilder(usage: 'mergeAndRemapSamples [options] [bamfiles]') | |
cli.with { | |
c longOpt: 'chromosome', args: 1, required: false, 'Chromosome to include in the output' | |
o longOpt: 'output', args: 1, required: false, 'File to output (default = /dev/stdout)' | |
m longOpt: 'map', args: 1, required: false, 'File containing tab-delimited remapping of sample names' | |
h longOpt: 'help', required: false, 'Print command help (hey, you just did that!)' | |
} | |
def opt = cli.parse(args) | |
if (opt.h) { | |
cli.usage() | |
} else { | |
def outfile = opt.o ? opt.o : '/dev/stdout' | |
mergeAndRemap(opt.c, opt.m, opt.arguments(), outfile) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment