Last active
July 20, 2016 18:04
-
-
Save dmyersturnbull/24d21182ce589d776a824fe47297eaae to your computer and use it in GitHub Desktop.
Lazily read a text file, gunzip based on filename extension, and return newline-stripped lines.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gzip, io | |
| from typing import Iterator | |
| def lines(file_name: str, known_encoding='utf-8') -> Iterator[str]: | |
| """Lazily read a text file or gzipped text file, decode, and strip any newline character (\n or \r). | |
| If the file name ends with '.gz' or '.gzip', assumes the file is Gzipped. | |
| Arguments: | |
| known_encoding: Applied only when decoding gzip | |
| """ | |
| if file_name.endswith('.gz') or file_name.endswith('.gzip'): | |
| with io.TextIOWrapper(gzip.open(file_name, 'r'), encoding=known_encoding) as f: | |
| for line in f: yield line.rstrip('\n\r') | |
| else: | |
| with open(file_name, 'r') as f: | |
| for line in f: yield line.rstrip('\n\r') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment