Skip to content

Instantly share code, notes, and snippets.

@wdecoster
Created October 21, 2017 11:59
Show Gist options
  • Select an option

  • Save wdecoster/66bfed93858bbfbabf4d174f250cd496 to your computer and use it in GitHub Desktop.

Select an option

Save wdecoster/66bfed93858bbfbabf4d174f250cd496 to your computer and use it in GitHub Desktop.
import concurrent.futures as cfutures
import nanomath
from Bio import SeqIO
def stream_fastq_full(fastq, threads):
'''
Extract from a fastq file:
-readname
-average and median quality
-read_lenght
'''
with cfutures.ProcessPoolExecutor(max_workers=threads) as executor:
for results in executor.map(extract_all_from_fastq, SeqIO.parse(fastq, "fastq")):
yield results
def extract_all_from_fastq(rec):
'''
Worker function for extraction of metrics from a fastq record Seq object
If length 0, nanomath.aveQual will throw a ZeroDivisionError
Skipping the read is okay then.
'''
try:
return (rec.id,
len(rec),
nanomath.ave_qual(rec.letter_annotations["phred_quality"]),
nanomath.median_qual(rec.letter_annotations["phred_quality"]))
except ZeroDivisionError:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment