Created
April 17, 2013 06:21
-
-
Save cldotdev/5402166 to your computer and use it in GitHub Desktop.
fixname - Fix hit name in the blastlist (standalone)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # | |
| # fixname - Fix hit name in the blastlist | |
| # | |
| # Copyright (C) 2013, Jian-Long Huang | |
| # Licensed under The MIT License | |
| # http://opensource.org/licenses/MIT | |
| # | |
| # Author: Jian-Long Huang ([email protected]) | |
| # Version: 0.1 | |
| # Created: 2013.1.30 | |
| # | |
| # Usage: <input.blastlist> [options] | |
| # | |
| # Options: | |
| # -o, --output STR: output file name. If this option is not specified, the script will generate | |
| # one with unique identifier at current directory. | |
| # | |
| # This script replace the hit name generated with makeblastdb tool with NCBI accession name. | |
| import sys | |
| import argparse | |
| import re | |
| import string | |
| import random | |
| def genid(size=6, chars=string.ascii_uppercase + string.digits): | |
| random.seed() | |
| return ''.join(random.choice(chars) for i in range(size)) | |
| def main(): | |
| parser = argparse.ArgumentParser(description='fixname - Fix hit name in the blastlist') | |
| parser.add_argument('input_file') | |
| parser.add_argument('-o', '--output', dest='output_file', | |
| help='output file name. If this option is not specified, the script will generate ' | |
| 'one with unique identifier at current directory.') | |
| args = parser.parse_args() | |
| if args.output_file is None: | |
| args.output_file = args.input_file + '_out_' + genid() + '.fix' | |
| hitname = re.compile('.+?\s(.+?)(\s.+)') | |
| with open(args.input_file, 'r') as fin, open(args.output_file, 'w') as fw: | |
| for linum, line in enumerate(fin, start=1): | |
| if line.lstrip() == '' or line.lstrip()[0] in ('#', 'a'): | |
| fw.write(line) | |
| fw.flush() | |
| else: | |
| data = line.split('\t') | |
| match = hitname.match(data[26]) | |
| if match is None: | |
| print('No mathced name in line ' + str(linum) + '.') | |
| print('Please have a check.') | |
| sys.exit() | |
| else: | |
| data[4] = match.group(1) | |
| data[26] = match.group(1) + match.group(2) + '\n' | |
| fw.write('\t'.join(data)) | |
| fw.flush() | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment