Created
March 8, 2012 17:20
-
-
Save taoliu/2002182 to your computer and use it in GitHub Desktop.
ma2cWigToBedGraph fix MA2C wig output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Time-stamp: <2012-01-10 17:21:24 Tao Liu> | |
# in the wiggle file from MA2C, the 1st column -- position is actually | |
# the center of each probe. Some probes may overlap. In order to | |
# convert them to appropriate bigwig, I need to create bedGraph file | |
# with above two problems fixed. | |
import os | |
import sys | |
# ------------------------------------ | |
# Main function | |
# ------------------------------------ | |
def main(): | |
if len(sys.argv) < 3: | |
sys.stderr.write("""need 2 paras: %s <shift> <MA2C wig file> | |
shift: if the position in wig file is not the leftmost position of the represented region, use this option to fix. For example, if a probe is 50bp, and in wig, 1st column is | |
the middle points of probes, use -25. Otherwise, use 0 | |
**Make sure your variableStep wig file is from MA2C! | |
**In the output, gaps will be filled with 0 and if two regions are overlapped, the later value will be used. | |
""" % sys.argv[0]) | |
sys.exit(1) | |
ssize = int(sys.argv[1]) | |
wigfile = open(sys.argv[2],"r") | |
#fs = bdgfile.readline().rstrip().split() | |
#prev_region = (fs[0],int(fs[1]),int(fs[2]),float(fs[3])) | |
prev_region = None | |
for l in wigfile: | |
if l.startswith("track"): | |
continue | |
if l.startswith("variableStep"): | |
chrom = l.rstrip().split()[1].split("=")[1] | |
span = int(l.rstrip().split()[2].split("=")[1]) | |
continue | |
(pos,value) = l.rstrip().split() | |
startpos = int(pos) + ssize | |
endpos = startpos + span | |
curr_region = (chrom,startpos,endpos,float(value)) | |
if not prev_region: | |
prev_region = curr_region | |
continue | |
if prev_region[0] == curr_region[0]: | |
# only if they are in the same chromosome | |
right_pos_of_prev_region = min(curr_region[1],prev_region[1]+span) | |
if right_pos_of_prev_region > prev_region[1]: | |
print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) | |
# fill in gaps with 0 | |
if curr_region[1] > prev_region[1]+span: | |
print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1]+span,curr_region[1],0) | |
else: | |
right_pos_of_prev_region = prev_region[1]+span | |
if right_pos_of_prev_region > prev_region[1]: | |
print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) | |
prev_region = curr_region | |
# last region | |
right_pos_of_prev_region = prev_region[1]+span | |
if right_pos_of_prev_region > prev_region[1]: | |
print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment