Created
September 14, 2017 08:08
-
-
Save walterst/7500b638d063cc29c5e28beab2eaf0f5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Used to count fastq seqs in gzipped files, write counts and file name to log file | |
# Usage: python count_zipped_fastq_reads.py fastq_folder log_file | |
# where fastq_folder has all of the fastq files in it (doesn't search subdirectories) | |
from sys import argv | |
from glob import glob | |
from cogent.parse.fastq import MinimalFastqParser | |
from qiime.util import gzip_open | |
header_index = 0 | |
sequence_index = 1 | |
quality_index = 2 | |
fastq_files = glob(argv[1] + "/*.gz") | |
output_log = open(argv[2], "w") | |
for curr_file in fastq_files: | |
if curr_file.endswith('.gz'): | |
query_reads = gzip_open(curr_file) | |
else: | |
query_reads = open(curr_file, "U") | |
curr_counts = 0 | |
for read_data in MinimalFastqParser(query_reads, strict=False): | |
curr_counts += 1 | |
output_log.write("%s\t%d\n" % (curr_file, curr_counts)) | |
query_reads.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment