Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Created July 8, 2012 17:06
Show Gist options
  • Save gregcaporaso/3071800 to your computer and use it in GitHub Desktop.
Save gregcaporaso/3071800 to your computer and use it in GitHub Desktop.
Script to convert a group of MG-RAST files into a single seqs.fna file for use with QIIME
#!/usr/bin/env python
# File created on 26 Jun 2012
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.5.0-dev"
__maintainer__ = "Greg Caporaso"
__email__ = "[email protected]"
__status__ = "Development"
from glob import glob
from os.path import split, splitext
from cogent.parse.fasta import MinimalFastaParser
from qiime.util import parse_command_line_parameters, make_option, gzip_open
script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [("","","")]
script_info['output_description']= ""
script_info['required_options'] = [
make_option('-i','--input_glob',type="string",help='wildcard statement to match input files'),
make_option('-o','--output_fp',type="new_filepath",help='the output filepath'),
]
script_info['optional_options'] = []
script_info['version'] = __version__
def mgrast_fnas_to_qiime_seqs(input_fna_fps):
""" """
i = 0
for fp in input_fna_fps:
metagenome_id = '.'.join(split(fp)[1].split('.')[:2])
for seq_id, seq in MinimalFastaParser(gzip_open(fp)):
yield ('%s_%d %s' % (metagenome_id,i,seq_id),seq.upper())
i += 1
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
input_fna_fps = glob(opts.input_glob)
output_f = open(opts.output_fp,'w')
for e in mgrast_fnas_to_qiime_seqs(input_fna_fps):
output_f.write('>%s\n%s\n' % e)
output_f.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment