Skip to content

Instantly share code, notes, and snippets.

@nimezhu
Created January 29, 2016 21:50
Show Gist options
  • Save nimezhu/3ef38f353689394eec55 to your computer and use it in GitHub Desktop.
Save nimezhu/3ef38f353689394eec55 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Programmer : zhuxp
# Date:
# Last-modified: 01-29-2016, 16:48:48 EST
from __future__ import print_function
VERSION="0.1"
import os,sys,argparse
from bam2x.Annotation import BED6
from bam2x import TableIO,Tools
from bam2x import IO
import time
from collections import defaultdict
def ParseArg():
''' This Function Parse the Argument '''
p=argparse.ArgumentParser( description = 'Example: %(prog)s -h', epilog='Library dependency : bam2x')
p.add_argument('-v','--version',action='version',version='%(prog)s '+VERSION)
p.add_argument('-i','--input',dest="input",nargs="*",help="input bed6 or bed5(macs output) files")
p.add_argument('-o','--output',dest="output",type=str,default="stdout",help="output file DEFAULT: STDOUT")
p.add_argument('-c','--cutoff',dest="cutoff",type=int,default=0,help="cutoff( 0:union ) (1: intesect)")
#p.add_argument('-n','--num_cpus',dest="num_cpus",type=int,default=4,help="number of cpus DEFAULT: %(default)i")
if len(sys.argv)==1:
print(p.print_help(),file=sys.stderr)
exit(0)
return p.parse_args()
'''
bed union merge bed files and report the union or intersect region (generate the new bed region) .
'''
def Main():
'''
IO TEMPLATE
'''
global args,out
args=ParseArg()
out=IO.fopen(args.output,"w")
for i in args.input:
print(i,file=out)
a=BED6("chr01",100,200,"id",0,".")
b=BED6("chr03",150,250,"id",0,".")
b2=BED6("chr02",150,250,"id",0,".")
b3=BED6("chr01",150,250,"id",0,".")
c=BED6("chr01",255,300,"id",0,".")
d=BED6("chr01",275,350,"id",0,".")
beds=[a,b,c,d,b2,b3]
for i in merge_bed6_set(beds,args.cutoff):
print(str(i))
def merge_bed6_set(beds,cutoff=0,id="noname"):
'''
beds list to merge.
1.assign beds to chromosome
2.report the coord in bed format
'''
k=0;
mbed=assign_bed_to_chrom(beds)
for i in mbed.keys():
b=mbed[i]
for j in _merge_bed6(b,cutoff):
yield BED6(i,j[0],j[1],id+"_"+str(k),cutoff+1,".")
k+=1
def initarray():
return []
def assign_bed_to_chrom(beds):
mbed=defaultdict(initarray);
for b in beds:
mbed[b.chr].append(b);
return mbed
def _merge_bed6(beds,cutoff=0):
'''
a simple turing state change
given a bedlist in same chromosome
report the region > cutoff
> 0 union
> 1 intesect
> 2 ( coverage > 3 region )
'''
l=[]
strand=beds[0].strand
for i in beds:
l.append((i.start,-1))
l.append((i.stop,1))
if strand!=i.strand: strand="."
chr=beds[0].chr
l.sort()
switch=0
start=l[0][0]
end=l[-1][0]
state=0
assert i[0][1] > 1
last_pos=l[0][0]
state=0
switch=0
for i in l[0:]:
state-=i[1]
if switch==1 and state==cutoff:
if last_pos!=i[0]:
yield last_pos,i[0]
switch=0
elif switch==0 and state>cutoff:
last_pos=i[0]
switch=1
assert state==0
#blockCount=len(blockSizes)
#return BED12(chr,start,end,id,0.0,strand,start,start,"0,0,0",blockCount,blockSizes,blockStarts)
if __name__=="__main__":
Main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment