Last active
July 28, 2024 21:53
-
-
Save jszym/9860a2671dabb45424f2673a49e4b582 to your computer and use it in GitHub Desktop.
Streaming FASTA file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright (C) 2024 by Joseph Szymborski (jszym.com) | |
Permission to use, copy, modify, and/or distribute this software for | |
any purpose with or without fee is hereby granted. | |
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS | |
ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL | |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO | |
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE | |
USE OR PERFORMANCE OF THIS SOFTWARE. | |
""" | |
import gzip | |
from pathlib import Path | |
from typing import Tuple, Generator | |
def stream_fasta(fasta_path: Path) -> Generator[Tuple[str,str]]: | |
""" | |
Iterates over a FASTA file, returning a tuple of FASTA | |
record name and sequence for each iteration. | |
:param fasta_path: Path of the FASTA file to stream. If gzip'd, must have a .gz extension. | |
""" | |
if str(fasta_path).endswith(".gz"): | |
f = gzip.open(str(fasta_path), "rt") | |
else: | |
f = open(str(fasta_path), "rt") | |
sequence = None | |
for line in f: | |
line = line.strip() | |
if line.startswith(">"): | |
if sequence is not None and sequence != "": | |
yield name, sequence | |
name = line[1:] | |
sequence = "" | |
else: | |
sequence += line |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment