Skip to content

Instantly share code, notes, and snippets.

@walterst
Created September 14, 2017 08:08
Show Gist options
  • Save walterst/7500b638d063cc29c5e28beab2eaf0f5 to your computer and use it in GitHub Desktop.
Save walterst/7500b638d063cc29c5e28beab2eaf0f5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Used to count fastq seqs in gzipped files, write counts and file name to log file
# Usage: python count_zipped_fastq_reads.py fastq_folder log_file
# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)
from sys import argv
from glob import glob
from cogent.parse.fastq import MinimalFastqParser
from qiime.util import gzip_open
header_index = 0
sequence_index = 1
quality_index = 2
fastq_files = glob(argv[1] + "/*.gz")
output_log = open(argv[2], "w")
for curr_file in fastq_files:
if curr_file.endswith('.gz'):
query_reads = gzip_open(curr_file)
else:
query_reads = open(curr_file, "U")
curr_counts = 0
for read_data in MinimalFastqParser(query_reads, strict=False):
curr_counts += 1
output_log.write("%s\t%d\n" % (curr_file, curr_counts))
query_reads.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment