Last active
December 13, 2015 19:39
-
-
Save slowkow/4964451 to your computer and use it in GitHub Desktop.
Python snippet: mimic the command `bedtools merge`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""merge_intervals.py | |
Merge tab-delimited intervals: | |
cat tissue1.txt | |
chr1,1-2 | |
chr1,3-4 | |
chr1,5-6 | |
chr1,6-7 | |
chr1,7-8 | |
chr1,9-10 | |
chr2,52,53 | |
chr2,40-50 | |
cat tissue2.txt | |
chr1,11-15 | |
chr1,20-23 | |
chr2,53,57 | |
chr2,40-50 | |
python merge_intervals.py <(cat tissue?.txt | tr ,- '\t') | |
chr2 40 50 | |
chr2 52 57 | |
chr1 1 2 | |
chr1 3 4 | |
chr1 5 8 | |
chr1 9 10 | |
chr1 11 15 | |
chr1 20 23 | |
""" | |
import fileinput | |
from bx.intervals.cluster import ClusterTree | |
from collections import defaultdict | |
# ClusterTree arguments: | |
# Intervals this far apart are combined into a cluster. | |
# Number of overlapping intervals needed to make a cluster. | |
trees = defaultdict(lambda: ClusterTree(0, 1)) | |
for n, line in enumerate(fileinput.input()): | |
chrom, beg, end = line.split('\t')[:3] | |
trees[chrom].insert(int(beg), int(end), n) | |
for chrom, tree in trees.iteritems(): | |
# Tuples like: (beg, end, [sorted list of interval ids in the cluster]) | |
for beg, end, ns in trees[chrom].getregions(): | |
print chrom, '\t', beg, '\t', end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Check out: