Skip to content

Instantly share code, notes, and snippets.

@seandavi
Created March 11, 2012 01:58
Show Gist options
  • Save seandavi/2014542 to your computer and use it in GitHub Desktop.
Save seandavi/2014542 to your computer and use it in GitHub Desktop.
Split a BAM file by Read Group ID
#!/usr/bin/env python
# split a bam file by read group ID
# Sean Davis <[email protected]>
# March 10, 2012
#
import pysam
import argparse
import logging
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('filename',help="The bam filename")
opts = parser.parse_args()
infile = pysam.Samfile(opts.filename,'rb')
header = infile.header
readgroups = header['RG']
# remove readgroups from header
del(header['RG'])
outfiles = {}
for rg in readgroups:
tmphead = header
tmphead['RG']=[rg]
logging.info('Creating new BAM file: %s',(rg['ID']+'.bam','wb'))
outfiles[rg['ID']] = pysam.Samfile(rg['ID']+'.bam','wb',header=tmphead)
j=0
for read in infile:
j+=1
idtag = [x[1] for x in read.tags if x[0]=='RG'][0]
if((j % 100000)==0):
logging.info('read and wrote %d records',(j))
outfiles[idtag].write(read)
for f in outfiles.values():
f.close()
infile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment