Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Last active December 14, 2015 15:49
Show Gist options
  • Save gregcaporaso/5110725 to your computer and use it in GitHub Desktop.
Save gregcaporaso/5110725 to your computer and use it in GitHub Desktop.
A very quick-and-dirty, untested script to see if it'll be possible to analyze miRNA sequence data in [QIIME](www.qiime.org). Just need this code to perform some initial sequence processing. THIS SCRIPT IS UNTESTED - DO NOT USE FOR ANYTHING IMPORTANT!!!
#!/usr/bin/env python
# File created on 07 Mar 2013
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.6.0-dev"
__maintainer__ = "Greg Caporaso"
__email__ = "[email protected]"
__status__ = "Development"
from cogent.parse.fastq import MinimalFastqParser
from qiime.util import parse_command_line_parameters, make_option, create_dir
script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [\
("","","%prog -i sample1_input.fastq -o out/ -s sample1 -5 TGGAATTCTCGGGTGCCAAGG")]
script_info['output_description']= ""
script_info['required_options'] = [
make_option('-i','--input_fp',type="existing_filepath",help='the input filepath'),
make_option('-s','--sample_id',help='the sample id'),
make_option('-o','--output_dir',help='the output_directory'),
]
script_info['optional_options'] = [
make_option('-5','--five_prime_adapter',help='The five prime adaptor sequence'),
make_option('-m',
'--five_prime_adapter_max_mismatches',
default=1,
type='int',
help='The five prime adaptor sequence'),
]
script_info['version'] = __version__
def trim_mirna_adapters(input_fastq,
sample_id,
output_f,
five_prime_adapter,
five_prime_adapter_max_mismatches):
""" """
if five_prime_adapter == None:
def retain_seq(seq):
return True, seq
else:
five_prime_adapter_len = len(five_prime_adapter)
def retain_seq(seq):
count_diffs = 0
for a,e in zip(seq[:five_prime_adapter_len],five_prime_adapter):
if a != e:
count_diffs += 1
return count_diffs <= five_prime_adapter_max_mismatches, seq[five_prime_adapter_len:]
i = 0
for seq_id, seq, qual in input_fastq:
acceptable_adapter, trimmed_seq = retain_seq(seq)
if acceptable_adapter:
output_f.write('>%s_%d %s\n%s\n' % (sample_id,i,seq_id,trimmed_seq))
i += 1
else:
pass
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
create_dir(opts.output_dir)
trim_mirna_adapters(MinimalFastqParser(open(opts.input_fp,'U'),strict=False),
opts.sample_id,
open('%s/%s.fna' % (opts.output_dir,opts.sample_id),'w'),
opts.five_prime_adapter,
opts.five_prime_adapter_max_mismatches)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment