Skip to content

Instantly share code, notes, and snippets.

@indapa
Created December 8, 2010 16:38
Show Gist options
  • Save indapa/733516 to your computer and use it in GitHub Desktop.
Save indapa/733516 to your computer and use it in GitHub Desktop.
determine intervals in file2.bed not covered by the file1.bed, given a length file of sources
#!/usr/bin/python
from optparse import OptionParser
from bx.bitset import *
from bx.bitset_builders import *
def read_len( lengthfh ):
"""Read a 'LEN' file and return a mapping from chromosome to length"""
mapping = dict()
for line in lengthfh:
fields = line.split()
mapping[ fields[0] ] = int( fields[1] )
return mapping
def main():
""" determine intervals in file2.bed not covered by the file1.bed """
usage = "usage: %prog --lenfile=LENGTHFILE file1.bed file2.bed\n\ndetermine intervals in file2.bed not covered by the file1.bed\n"
parser = OptionParser(usage)
parser.add_option("--lenfile", type="string", dest="lengthfile", help="file with source name and bp size, seperated by white space")
(options, args)=parser.parse_args()
try:
bed_first, bed_second = args
except:
print usage
exit(1)
try:
lenfh=open(options.lengthfile)
lens=read_len(lenfh)
except:
print usage
exit(1)
bitsets1 = binned_bitsets_from_file( open( bed_first ) ) #bitsets1 set to 1 for bp in the bed intervals for bedfile1
bitsets2 = binned_bitsets_from_file( open( bed_second ) )#bitsets2 set to 1 for bp in the bed intervals for bedfile2
bitsets = dict()
#invert the bitsets1, so that the bits set to 1 now indicate intervals that are *not* covered by bedfile1
for chrom in lens:
if chrom in bitsets1:
bitsets1[chrom].invert()
len = lens[chrom]
end = 0
while 1:
start = bitsets1[chrom].next_set( end )
if start == bitsets1[chrom].size: break
end = bitsets1[chrom].next_clear( start )
if end > len: end = len
if end == len: break
else:
pass
#which intervals in bedfile2 are overlapping with the intervals not covered by bedfile1
for key in bitsets1:
if key in bitsets2:
bitsets1[key].iand( bitsets2[key] )
bitsets[key] = bitsets1[key]
for chrom in bitsets:
bits = bitsets[chrom]
end = 0
while 1:
start = bits.next_set( end )
if start == bits.size: break
end = bits.next_clear( start )
print "%s\t%d\t%d" % ( chrom, start, end )
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment