Last active
December 14, 2015 15:49
-
-
Save gregcaporaso/5110725 to your computer and use it in GitHub Desktop.
A very quick-and-dirty, untested script to see if it'll be possible to analyze miRNA sequence data in [QIIME](www.qiime.org). Just need this code to perform some initial sequence processing. THIS SCRIPT IS UNTESTED - DO NOT USE FOR ANYTHING IMPORTANT!!!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# File created on 07 Mar 2013 | |
from __future__ import division | |
__author__ = "Greg Caporaso" | |
__copyright__ = "Copyright 2011, The QIIME project" | |
__credits__ = ["Greg Caporaso"] | |
__license__ = "GPL" | |
__version__ = "1.6.0-dev" | |
__maintainer__ = "Greg Caporaso" | |
__email__ = "[email protected]" | |
__status__ = "Development" | |
from cogent.parse.fastq import MinimalFastqParser | |
from qiime.util import parse_command_line_parameters, make_option, create_dir | |
script_info = {} | |
script_info['brief_description'] = "" | |
script_info['script_description'] = "" | |
script_info['script_usage'] = [\ | |
("","","%prog -i sample1_input.fastq -o out/ -s sample1 -5 TGGAATTCTCGGGTGCCAAGG")] | |
script_info['output_description']= "" | |
script_info['required_options'] = [ | |
make_option('-i','--input_fp',type="existing_filepath",help='the input filepath'), | |
make_option('-s','--sample_id',help='the sample id'), | |
make_option('-o','--output_dir',help='the output_directory'), | |
] | |
script_info['optional_options'] = [ | |
make_option('-5','--five_prime_adapter',help='The five prime adaptor sequence'), | |
make_option('-m', | |
'--five_prime_adapter_max_mismatches', | |
default=1, | |
type='int', | |
help='The five prime adaptor sequence'), | |
] | |
script_info['version'] = __version__ | |
def trim_mirna_adapters(input_fastq, | |
sample_id, | |
output_f, | |
five_prime_adapter, | |
five_prime_adapter_max_mismatches): | |
""" """ | |
if five_prime_adapter == None: | |
def retain_seq(seq): | |
return True, seq | |
else: | |
five_prime_adapter_len = len(five_prime_adapter) | |
def retain_seq(seq): | |
count_diffs = 0 | |
for a,e in zip(seq[:five_prime_adapter_len],five_prime_adapter): | |
if a != e: | |
count_diffs += 1 | |
return count_diffs <= five_prime_adapter_max_mismatches, seq[five_prime_adapter_len:] | |
i = 0 | |
for seq_id, seq, qual in input_fastq: | |
acceptable_adapter, trimmed_seq = retain_seq(seq) | |
if acceptable_adapter: | |
output_f.write('>%s_%d %s\n%s\n' % (sample_id,i,seq_id,trimmed_seq)) | |
i += 1 | |
else: | |
pass | |
def main(): | |
option_parser, opts, args =\ | |
parse_command_line_parameters(**script_info) | |
create_dir(opts.output_dir) | |
trim_mirna_adapters(MinimalFastqParser(open(opts.input_fp,'U'),strict=False), | |
opts.sample_id, | |
open('%s/%s.fna' % (opts.output_dir,opts.sample_id),'w'), | |
opts.five_prime_adapter, | |
opts.five_prime_adapter_max_mismatches) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment