walterst · November 16, 2017 22:28
diff --git a/filter_barcode_header.py b/filter_barcode_header.py
 #!/usr/bin/env python


 # Usage:  python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
 # WARNING-the second file specified will be overwritten if it exists!

 bc_start_indicator = "1:N:0:"
 chars_to_strip = ["+"]

 from sys import argv

 from cogent.parse.fastq import MinimalFastqParser
 from qiime.util import gzip_open

 header_index = 0
 sequence_index = 1
 quality_index = 2

 if argv[1].endswith('.gz'):
    query_reads = gzip_open(argv[1])
 else:
    query_reads = open(argv[1], "U")

 output_fastq = open(argv[2], "w")

 for read_data in MinimalFastqParser(query_reads, strict=False):
    curr_header = read_data[header_index].split(bc_start_indicator)
    curr_bc = curr_header[1]
    for char_to_strip in chars_to_strip:
        curr_bc = curr_bc.replace(char_to_strip, "")
    final_header = curr_header[0] + bc_start_indicator + curr_bc
    curr_read = "@%s\n" % final_header
    curr_read += "%s\n" % read_data[sequence_index]
    curr_read += "+\n"
    curr_read += "%s\n" % read_data[quality_index]
    output_fastq.write(curr_read)
	#!/usr/bin/env python


	# Usage: python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
	# WARNING-the second file specified will be overwritten if it exists!

	bc_start_indicator = "1:N:0:"
	chars_to_strip = ["+"]

	from sys import argv

	from cogent.parse.fastq import MinimalFastqParser
	from qiime.util import gzip_open

	header_index = 0
	sequence_index = 1
	quality_index = 2

	if argv[1].endswith('.gz'):
	query_reads = gzip_open(argv[1])
	else:
	query_reads = open(argv[1], "U")

	output_fastq = open(argv[2], "w")

	for read_data in MinimalFastqParser(query_reads, strict=False):
	curr_header = read_data[header_index].split(bc_start_indicator)
	curr_bc = curr_header[1]
	for char_to_strip in chars_to_strip:
	curr_bc = curr_bc.replace(char_to_strip, "")
	final_header = curr_header[0] + bc_start_indicator + curr_bc
	curr_read = "@%s\n" % final_header
	curr_read += "%s\n" % read_data[sequence_index]
	curr_read += "+\n"
	curr_read += "%s\n" % read_data[quality_index]
	output_fastq.write(curr_read)
No results found