Created
October 21, 2017 11:59
-
-
Save wdecoster/66bfed93858bbfbabf4d174f250cd496 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import concurrent.futures as cfutures | |
| import nanomath | |
| from Bio import SeqIO | |
| def stream_fastq_full(fastq, threads): | |
| ''' | |
| Extract from a fastq file: | |
| -readname | |
| -average and median quality | |
| -read_lenght | |
| ''' | |
| with cfutures.ProcessPoolExecutor(max_workers=threads) as executor: | |
| for results in executor.map(extract_all_from_fastq, SeqIO.parse(fastq, "fastq")): | |
| yield results | |
| def extract_all_from_fastq(rec): | |
| ''' | |
| Worker function for extraction of metrics from a fastq record Seq object | |
| If length 0, nanomath.aveQual will throw a ZeroDivisionError | |
| Skipping the read is okay then. | |
| ''' | |
| try: | |
| return (rec.id, | |
| len(rec), | |
| nanomath.ave_qual(rec.letter_annotations["phred_quality"]), | |
| nanomath.median_qual(rec.letter_annotations["phred_quality"])) | |
| except ZeroDivisionError: | |
| pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment