walterst · September 14, 2017 08:08
diff --git a/count_zipped_fastq_reads.py b/count_zipped_fastq_reads.py
 #!/usr/bin/env python

 # Used to count fastq seqs in gzipped files, write counts and file name to log file
 # Usage:  python count_zipped_fastq_reads.py fastq_folder log_file
 # where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

 from sys import argv
 from glob import glob

 from cogent.parse.fastq import MinimalFastqParser
 from qiime.util import gzip_open

 header_index = 0
 sequence_index = 1
 quality_index = 2


 fastq_files = glob(argv[1] + "/*.gz")       

 output_log = open(argv[2], "w")



 for curr_file in fastq_files:

    if curr_file.endswith('.gz'):
        query_reads = gzip_open(curr_file)
    else:
        query_reads = open(curr_file, "U")

    curr_counts = 0
    for read_data in MinimalFastqParser(query_reads, strict=False):
        curr_counts += 1
        
    output_log.write("%s\t%d\n" % (curr_file, curr_counts))
    query_reads.close()
	#!/usr/bin/env python

	# Used to count fastq seqs in gzipped files, write counts and file name to log file
	# Usage: python count_zipped_fastq_reads.py fastq_folder log_file
	# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

	from sys import argv
	from glob import glob

	from cogent.parse.fastq import MinimalFastqParser
	from qiime.util import gzip_open

	header_index = 0
	sequence_index = 1
	quality_index = 2


	fastq_files = glob(argv[1] + "/*.gz")

	output_log = open(argv[2], "w")



	for curr_file in fastq_files:

	if curr_file.endswith('.gz'):
	query_reads = gzip_open(curr_file)
	else:
	query_reads = open(curr_file, "U")

	curr_counts = 0
	for read_data in MinimalFastqParser(query_reads, strict=False):
	curr_counts += 1

	output_log.write("%s\t%d\n" % (curr_file, curr_counts))
	query_reads.close()