Last active
August 12, 2025 13:44
-
-
Save ArthurDelannoyazerty/4148dd134baff553eea099f0a9b8ce0c to your computer and use it in GitHub Desktop.
functions to scan the file of a zipfile and load a selected image or move a file to a folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""from . import ZipUtils""" | |
import zipfile | |
from pathlib import Path | |
from typing import Callable | |
import numpy as np | |
from PIL import Image | |
import shutil | |
__all__ = ["ZipUtils"] | |
class ZipUtils: | |
"""A utility class for working with zip files.""" | |
def __init__(self): | |
"""This class is not meant to be instantiated.""" | |
raise NotImplementedError("ZipUtils is a utility class and cannot be instantiated.") | |
@staticmethod | |
def load_image_from_zip(zip_filepath:Path, predicate:Callable[[str], bool]) -> np.ndarray: | |
""" | |
Loads the first image from a zip archive that matches a given condition. | |
Args: | |
zip_filepath (Path): The path to the zip file. | |
predicate (Callable[[str], bool]): A function that takes a filename | |
(string) as input and returns True if it's the desired file, | |
otherwise False. | |
Returns: | |
np.ndarray: A NumPy array containing the image data. | |
Raises: | |
FileNotFoundError: If no file matching the predicate is found in the archive. | |
""" | |
with zipfile.ZipFile(zip_filepath, mode="r") as archive: | |
for file_info in archive.infolist(): | |
# Apply the user-defined condition to the filename | |
if predicate(file_info.filename): | |
with archive.open(file_info) as f: | |
with Image.open(f) as image: | |
return np.array(image) | |
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'") | |
@staticmethod | |
def extract_file_from_zip(zip_filepath:Path, local_folderpath:Path, predicate: Callable[[str], bool]) -> Path: | |
""" | |
Move the first file from a zip archive that matches a given condition. | |
Args: | |
zip_filepath (Path): The path to the zip file. | |
local_folderpath (Path) : The path where the image will be save | |
predicate (Callable[[str], bool]): A function that takes a filename | |
(string) as input and returns True if it's the desired file. | |
Returns: | |
Path: The path to the extracted file. | |
Raises: | |
FileNotFoundError: If no file matching the predicate is found in the archive. | |
""" | |
with zipfile.ZipFile(zip_filepath, mode="r") as archive: | |
for file in archive.filelist: | |
if predicate(file.filename): | |
# extract the file to a tree of folders (replica ot the archive structure with only the selected file and no other empty folders) | |
extracted_filepath = archive.extract(file, local_folderpath) | |
# move the file to the root tci folder | |
extracted_filepath = Path(extracted_filepath) | |
filepath = local_folderpath / extracted_filepath.name | |
extracted_filepath.rename(filepath) | |
empty_product_foldername = Path(file.filename).parts[0] # Get the name of the now empty extracted folder | |
shutil.rmtree(str(local_folderpath/empty_product_foldername)) # And remove it | |
return filepath # only one element from the archive ! | |
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'") | |
@staticmethod | |
def read_text_from_zip(zip_filepath: Path, predicate: Callable[[str], bool], byte=False, encoding: str = 'utf-8') -> str: | |
""" | |
Reads the text content of the first file from a zip archive that matches a given condition. | |
Args: | |
zip_filepath (Path): The path to the zip file. | |
predicate (Callable[[str], bool]): A function that takes a filename | |
(e.g., 'folder/file.txt') as input and returns True if it's the desired file. | |
byte (bool): If True, returns the file content as bytes. If False, returns as a string. | |
encoding (str): The text encoding to use for decoding the file bytes. | |
Defaults to 'utf-8'. | |
Returns: | |
str: The content of the found file as a string. | |
Raises: | |
FileNotFoundError: If no file matching the predicate is found in the archive. | |
UnicodeDecodeError: If the file cannot be decoded with the specified encoding. | |
""" | |
if not zip_filepath.exists(): | |
raise FileNotFoundError(f"The specified zip file was not found: '{zip_filepath}'") | |
with zipfile.ZipFile(zip_filepath, mode="r") as archive: | |
for file_info in archive.infolist(): | |
if not file_info.is_dir(): | |
if predicate(file_info.filename): | |
file_bytes = archive.read(file_info.filename) | |
if byte: | |
return file_bytes | |
else: | |
return file_bytes.decode(encoding) | |
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'") | |
if __name__=='__main__': | |
def tci_predicate(filename:str): | |
return filename.endswith('.png') | |
image = ZipUtils.extract_file_from_zip(Path("example.zip"), Path("data/"), tci_predicate) | |
image = ZipUtils.load_image_from_zip(Path("example.zip"), tci_predicate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment