Created
December 7, 2011 06:47
-
-
Save brantfaircloth/1441757 to your computer and use it in GitHub Desktop.
Convert UCSC refgene to BED
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
File: refgene_to_bed.py | |
Author: Brant Faircloth | |
Created by Brant Faircloth on 06 December 2011 22:12 PST (-0800) | |
Copyright (c) 2011 Brant C. Faircloth. All rights reserved. | |
Description: convert UCSC refgene.txt files to BED format | |
""" | |
import os | |
import sys | |
import numpy | |
import argparse | |
#import pdb | |
def get_args(): | |
parser = argparse.ArgumentParser(description="""Convert UCSC refgene.txt to BED | |
format""") | |
parser.add_argument('input', nargs='?', default=sys.stdin) | |
parser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout) | |
return parser.parse_args() | |
def get_int_list(l): | |
return [int(i) for i in l.strip(',').split(',')] | |
def get_string_list(a): | |
return ','.join([str(i) for i in a.tolist()]) | |
def main(): | |
args = get_args() | |
for line in open(args.input, 'rU'): | |
ls = line.strip().split('\t') | |
starts, stops = get_int_list(ls[9]), get_int_list(ls[10]) | |
lengths = get_string_list(numpy.array(stops) - numpy.array(starts)) | |
relstarts = get_string_list(numpy.array(starts) - int(ls[4])) | |
outline = "{0}\t{1}\t{2}\t{3}\t999\t{4}\t{1}\t{2}\t0\t{5}\t{6}\t{7}\n".format( | |
ls[2], | |
ls[4], | |
ls[5], | |
ls[1], | |
ls[3], | |
ls[8], | |
lengths, | |
relstarts | |
) | |
args.output.write(outline) | |
args.output.close() | |
if __name__ == '__main__': | |
main() |
i haven't actually tried sending stdin, but will do so this evening. if you haven't seen some of these, you can do other slick stuff with argparse like:
https://gist.github.com/1443543
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
cool. I didn't know about args.FileType()
Won't you get an error if input actually is stdin (the default)?