These files were used while developing pyqi's Getting Started tutorials. See those documents for usage examples.
Created
August 23, 2013 14:54
-
-
Save gregcaporaso/6320252 to your computer and use it in GitHub Desktop.
Example files used while developing pyqi's Getting Started tutorials.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>s1 | |
ACCTTTAACC | |
>s2 | |
CCGG | |
>s3 | |
AAAAAAAAAAAAAAAAAAAAAAAAAAA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division | |
from pyqi.core.command import Command, Parameter, ParameterCollection | |
__author__ = "Greg Caporaso" | |
__copyright__ = "Copyright 2013, Greg Caporaso" | |
__credits__ = ["Greg Caporaso"] | |
__license__ = "BSD" | |
__version__ = "0.0.1" | |
__maintainer__ = "Greg Caporaso" | |
__email__ = "[email protected]" | |
class SequenceCollectionSummarizer(Command): | |
BriefDescription = "Generate summary statistics on a collection of sequences." | |
LongDescription = "Provided the number of sequences, the minimum sequence length, and the maximum sequence length given a collection of sequences. Sequences should be provided as a list (or generator) of tuples of (sequence id, sequence) pairs." | |
Parameters = ParameterCollection([ | |
Parameter(Name='seqs', DataType=list, | |
Description='sequences to be summarized', Required=True), | |
Parameter(Name='suppress_length_summary', DataType=bool, | |
Description='do not generate summary information on the sequence lengths', | |
Required=False,Default=False) | |
]) | |
def run(self, **kwargs): | |
""" | |
""" | |
num_seqs = 0 | |
sequence_lengths = [] | |
for seq_id, seq in kwargs['seqs']: | |
num_seqs += 1 | |
sequence_lengths.append(len(seq)) | |
if kwargs['suppress_length_summary']: | |
min_length = None | |
max_length = None | |
else: | |
min_length = min(sequence_lengths) | |
max_length = max(sequence_lengths) | |
return {'num-seqs':num_seqs, | |
'min-length':min_length, | |
'max-length':max_length} | |
CommandConstructor = SequenceCollectionSummarizer | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from pyqi.core.interfaces.optparse import (OptparseUsageExample, | |
OptparseOption, OptparseResult) | |
from pyqi.core.command import make_parameter_collection_lookup_f | |
from sequence_collection_summarizer import CommandConstructor | |
from pyqi.core.exception import IncompetentDeveloperError | |
import os | |
param_lookup = make_parameter_collection_lookup_f(CommandConstructor) | |
def parse_fasta(fp): | |
""" | |
fp: path to a fasta-formatted file | |
This function is a fasta record generator, yielding | |
(sequence id, sequence) pairs when provided with a | |
valid fasta file. | |
NO ERROR CHECKING IS PERFORMED! | |
""" | |
# Always open files for reading in python using mode 'U' | |
# to correctly handle different types of line breaks | |
f = open(fp,'U') | |
seq_id = None | |
seq = [] | |
for line in f: | |
line = line.strip() | |
if line.startswith('>'): | |
if len(seq) != 0: | |
# we've completed a fasta record | |
yield seq_id, ''.join(seq) | |
seq_id = line[1:] | |
seq = [] | |
else: | |
seq.append(line) | |
yield seq_id, ''.join(seq) | |
def append_datum_to_file(result_key, data, option_value=None): | |
"""Append summary information to a file. | |
""" | |
# don't do anything if data is None | |
if data is None: | |
return | |
# If option_value is None when this output handler is called, | |
# the interface developer did something wrong when defining | |
# the OptparseResults. Politely alert the developer that | |
# this output handler isn't associated with an option | |
# (it needs to be associated with an output file path). | |
if option_value is None: | |
raise IncompetentDeveloperError( | |
"Cannot write output without a filepath.") | |
# open the output file for appending, and write the | |
# summary information to a single tab-separated line | |
with open(option_value, 'a') as f: | |
f.write('%s\t%d\n' % (result_key, data)) | |
usage_examples = [ | |
OptparseUsageExample(ShortDesc="Summarize the input sequence collection and write the result to file.", | |
LongDesc="Read the file specified by -i, and compute the number of sequences in the file, and the minimum and maximum sequence lengths. Write all of that information to path specified by -o.", | |
Ex="%prog -i seqs.fna -o seqs.summary.txt"), | |
OptparseUsageExample(ShortDesc="Summarize the input sequence collection and write the result to file, excluding information on sequence lengths.", | |
LongDesc="Read the file specified by -i, compute the number of sequences in the file, and write that information to path specified by -o.", | |
Ex="%prog -i seqs.fna -o seqs.summary.txt --suppress-length-summary") | |
] | |
inputs = [ | |
OptparseOption(Parameter=param_lookup('seqs'), | |
InputType='existing_filepath', | |
InputAction='store', | |
InputHandler=parse_fasta, | |
ShortName='i'), | |
OptparseOption(Parameter=param_lookup('suppress_length_summary'), | |
InputType=None, | |
InputAction='store_true', | |
InputHandler=None, | |
ShortName=None), | |
OptparseOption(Parameter=None, | |
InputType='new_filepath', | |
InputAction='store', | |
ShortName='o', | |
Name='output-fp', | |
Required=True, | |
Help='path where output should be written') | |
] | |
outputs = [ | |
OptparseResult(ResultKey='num-seqs', | |
OutputHandler=append_datum_to_file, | |
OptionName='output-fp'), | |
OptparseResult(ResultKey='min-length', | |
OutputHandler=append_datum_to_file, | |
OptionName='output-fp'), | |
OptparseResult(ResultKey='max-length', | |
OutputHandler=append_datum_to_file, | |
OptionName='output-fp'), | |
] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment