Created
January 23, 2015 19:49
-
-
Save donkirkby/859fad4b5d999f789e4b to your computer and use it in GitHub Desktop.
Convert aligned reads CSV to FASTA format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import itertools | |
from operator import itemgetter | |
parser = argparse.ArgumentParser() | |
parser.add_argument('aligned', | |
type=argparse.FileType('rU'), | |
help='aligned reads CSV file') | |
parser.add_argument('--out', | |
'-o', | |
help='base file name for output FASTA files (if not ' + | |
'provided, a list of seeds is printed)') | |
args = parser.parse_args() | |
reader = csv.DictReader(args.aligned) | |
for refname, rows in itertools.groupby(reader, itemgetter('refname')): | |
if not args.out: | |
print refname | |
else: | |
filename = "{}.{}.fasta".format(args.out, refname) | |
print filename | |
with open(filename, 'w') as writer: | |
for row in rows: | |
count = int(row['count']) | |
writer.write('>r{}n{}\n'.format(row['rank'], count)) | |
seq = ('-'*count) + (row['seq'].replace('n', '-')) | |
width = 75 | |
for i in range(0, len(seq), width): | |
line = seq[i:i+width] | |
writer.write(line + '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment