Last active
March 6, 2022 00:48
-
-
Save apcamargo/f56288b1d4892716deb6f833ce6a4932 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bz2 | |
import gzip | |
import lzma | |
from contextlib import contextmanager | |
from enum import Enum, auto | |
from pathlib import Path | |
from typing import TextIO | |
class Compression(Enum): | |
bzip2 = auto() | |
gzip = auto() | |
xz = auto() | |
uncompressed = auto() | |
def is_compressed(filepath: Path) -> Compression: | |
with open(filepath, "rb") as f: | |
signature = f.peek(8)[:8] | |
if tuple(signature[:2]) == (0x1F, 0x8B): | |
return Compression.gzip | |
elif tuple(signature[:3]) == (0x42, 0x5A, 0x68): | |
return Compression.bzip2 | |
elif tuple(signature[:7]) == (0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, 0x00): | |
return Compression.xz | |
else: | |
return Compression.uncompressed | |
@contextmanager | |
def open_file(filepath: Path) -> TextIO: | |
filepath_compression = is_compressed(filepath) | |
if filepath_compression == Compression.gzip: | |
f = gzip.open(filepath, "rt") | |
elif filepath_compression == Compression.bzip2: | |
f = bz2.open(filepath, "rt") | |
elif filepath_compression == Compression.xz: | |
f = lzma.open(filepath, "rt") | |
else: | |
f = open(filepath, "r") | |
try: | |
yield f | |
finally: | |
f.close() | |
def read_file(filepath: Path, skip_header: bool=False) -> str: | |
with open_file(filepath) as f: | |
if skip_header: | |
next(f) | |
yield from f | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment