|
#!/usr/bin/env python |
|
# File created on 01 Jul 2013 |
|
from __future__ import division |
|
|
|
__author__ = "Greg Caporaso" |
|
__copyright__ = "Copyright 2013, The QIIME project" |
|
__credits__ = ["Greg Caporaso"] |
|
__license__ = "GPL" |
|
__version__ = "0.0.0" |
|
__maintainer__ = "Greg Caporaso" |
|
__email__ = "[email protected]" |
|
__status__ = "Development" |
|
|
|
from os.path import join, split, splitext |
|
from qiime.util import (parse_command_line_parameters, |
|
make_option) |
|
from cogent import DNA |
|
from cogent.parse.fastq import MinimalFastqParser |
|
from qiime.util import qiime_open, create_dir |
|
from qiime.parse import parse_mapping_file_to_dict |
|
|
|
|
|
script_info = {} |
|
script_info['brief_description'] = "Prepare fastq reads to be run through split_libraries_fastq.py." |
|
script_info['script_description'] = "" |
|
script_info['script_usage'] = [] |
|
script_info['script_usage'].append(("","","")) |
|
script_info['output_description']= "" |
|
script_info['required_options'] = [ |
|
make_option('-b','--barcoded_read_fp', |
|
type="existing_filepath", |
|
help='path to fastq file containing the non-barcoded reads'), |
|
make_option('-m','--mapping_fp', |
|
type="existing_filepath", |
|
help='path to metadata mapping file'), |
|
make_option('-o','--output_dir', |
|
type="new_dirpath", |
|
help='the output directory'), |
|
] |
|
script_info['optional_options'] = [ |
|
] |
|
script_info['version'] = __version__ |
|
|
|
def compute_barcode_length(barcode_map): |
|
barcode_lengths = [len(bc) for bc in barcode_map] |
|
barcode_length = barcode_lengths[0] |
|
assert len(set(barcode_lengths)) == 1,\ |
|
"All barcodes must be the same length." |
|
return barcode_length |
|
|
|
def strip_barcode_and_linker_primer(fastq, |
|
barcode_map): |
|
""" |
|
strip barcodes & linker-primer seqs; return barcode & amplicon seqs & quals |
|
|
|
fastq: open file handle contained barcoded fastq data |
|
barcode_map: dict mapping barcodes to the length of the associated |
|
linker+primer sequences |
|
|
|
yields header, barcode_read, barcode_qual, amplicon_read, amplicon_qual |
|
|
|
""" |
|
|
|
barcode_length = compute_barcode_length(barcode_map) |
|
|
|
for header, seq, qual in MinimalFastqParser(fastq,strict=False): |
|
barcode_read = seq[:barcode_length] |
|
barcode_qual = qual[:barcode_length] |
|
try: |
|
linker_primer_length = barcode_map[barcode_read] |
|
except KeyError: |
|
continue |
|
barcode_linker_primer_length = barcode_length + linker_primer_length |
|
amplicon_read = seq[barcode_linker_primer_length:] |
|
amplicon_qual = qual[barcode_linker_primer_length:] |
|
yield header, barcode_read, barcode_qual, amplicon_read, amplicon_qual |
|
|
|
|
|
def main(): |
|
option_parser, opts, args =\ |
|
parse_command_line_parameters(**script_info) |
|
|
|
fastq = qiime_open(opts.barcoded_read_fp) |
|
mapping_data, comments = parse_mapping_file_to_dict(qiime_open(opts.mapping_fp)) |
|
barcode_map = {} |
|
for k, v in mapping_data.items(): |
|
barcode_map[v['BarcodeSequence']] = len(v['LinkerPrimerSequence']) |
|
|
|
input_basename = split(splitext(opts.barcoded_read_fp)[0])[1] |
|
output_amplicon_fn = '%s.amplicon.fastq' % input_basename |
|
output_barcode_fn = '%s.barcode.fastq' % input_basename |
|
output_amplicon_fp = join(opts.output_dir,output_amplicon_fn) |
|
output_barcode_fp = join(opts.output_dir,output_barcode_fn) |
|
|
|
create_dir(opts.output_dir) |
|
output_amplicon_f = open(output_amplicon_fp,'w') |
|
output_barcode_f = open(output_barcode_fp,'w') |
|
|
|
for header, barcode_read, barcode_qual, amplicon_read, amplicon_qual in \ |
|
strip_barcode_and_linker_primer(fastq,barcode_map): |
|
output_barcode_f.write( |
|
'@%s\n%s\n+\n%s\n' % (header, barcode_read, barcode_qual)) |
|
output_amplicon_f.write( |
|
'@%s\n%s\n+\n%s\n' % (header, amplicon_read, amplicon_qual)) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
main() |