Forked from bluegenes/dammit_gff3_to_trinity_names.py
Last active
December 12, 2018 17:29
-
-
Save johnsolk/d40b5d59fb7b0453f5a55a753ace6af9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################### | |
"""Function: Take in a dammit gff3, dammit fasta, dammit namemap. | |
Output gff3 and fasta with trinity names. | |
by Dr. Tessa Pierce | |
""" | |
################################################################### | |
import sys | |
import os | |
import argparse | |
import screed | |
def dammit_to_trin(dammit_fasta,dammit_namemap,outdir,dammit_gff3, outFasta, outGFF): | |
with open(dammit_namemap, 'r') as f: | |
dammit2trin = {y:x.split(' ')[0] for x,y in split_more(f)} | |
with open(outFasta, 'w') as outF: | |
with screed.open(dammit_fasta) as seqs: | |
for read in seqs: | |
dammit_name, rest = read.name.split(' ', 1) | |
trinName = dammit2trin[dammit_name][1:] | |
full_name = trinName + ' ' + 'dammit=' + dammit_name + ' ' + rest | |
outF.write('>' + full_name + '\n' + read.sequence + '\n') | |
with open(outGFF, 'w') as outG: | |
with open(dammit_gff3, 'r') as gff: | |
next(gff) | |
for line in gff: | |
dammit_name, rest_gff_line = line.split('\t', 1) | |
trinName = dammit2trin[dammit_name][1:] | |
outG.write(trinName + '\t' + rest_gff_line) | |
#really quite unnecessary generator to "simplify" reading namemap into dict | |
def split_more(file_object): | |
for line in file_object: | |
yield line.strip().rsplit(',', 1) | |
if __name__ == '__main__': | |
"""Function: Take in a list of trinity gene names, a dammit namemap, | |
and a dammit fasta file. Output fasta of matching dammit contigs. | |
""" | |
psr = argparse.ArgumentParser() | |
psr.add_argument('dammit_fasta') | |
psr.add_argument('--dammit_namemap') | |
psr.add_argument('--dammit_gff3') | |
psr.add_argument('-o', '--outdir', default=os.getcwd()) | |
psr.add_argument('--outFasta') | |
psr.add_argument('--outGFF') | |
args = psr.parse_args() | |
dammit_to_trin(args.dammit_fasta,args.dammit_namemap,args.outdir,args.dammit_gff3, args.outFasta, args.outGFF) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment