Skip to content

Instantly share code, notes, and snippets.

@ArthurDelannoyazerty
Last active August 12, 2025 13:44
Show Gist options
  • Save ArthurDelannoyazerty/4148dd134baff553eea099f0a9b8ce0c to your computer and use it in GitHub Desktop.
Save ArthurDelannoyazerty/4148dd134baff553eea099f0a9b8ce0c to your computer and use it in GitHub Desktop.
functions to scan the file of a zipfile and load a selected image or move a file to a folder
"""from . import ZipUtils"""
import zipfile
from pathlib import Path
from typing import Callable
import numpy as np
from PIL import Image
import shutil
__all__ = ["ZipUtils"]
class ZipUtils:
"""A utility class for working with zip files."""
def __init__(self):
"""This class is not meant to be instantiated."""
raise NotImplementedError("ZipUtils is a utility class and cannot be instantiated.")
@staticmethod
def load_image_from_zip(zip_filepath:Path, predicate:Callable[[str], bool]) -> np.ndarray:
"""
Loads the first image from a zip archive that matches a given condition.
Args:
zip_filepath (Path): The path to the zip file.
predicate (Callable[[str], bool]): A function that takes a filename
(string) as input and returns True if it's the desired file,
otherwise False.
Returns:
np.ndarray: A NumPy array containing the image data.
Raises:
FileNotFoundError: If no file matching the predicate is found in the archive.
"""
with zipfile.ZipFile(zip_filepath, mode="r") as archive:
for file_info in archive.infolist():
# Apply the user-defined condition to the filename
if predicate(file_info.filename):
with archive.open(file_info) as f:
with Image.open(f) as image:
return np.array(image)
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'")
@staticmethod
def extract_file_from_zip(zip_filepath:Path, local_folderpath:Path, predicate: Callable[[str], bool]) -> Path:
"""
Move the first file from a zip archive that matches a given condition.
Args:
zip_filepath (Path): The path to the zip file.
local_folderpath (Path) : The path where the image will be save
predicate (Callable[[str], bool]): A function that takes a filename
(string) as input and returns True if it's the desired file.
Returns:
Path: The path to the extracted file.
Raises:
FileNotFoundError: If no file matching the predicate is found in the archive.
"""
with zipfile.ZipFile(zip_filepath, mode="r") as archive:
for file in archive.filelist:
if predicate(file.filename):
# extract the file to a tree of folders (replica ot the archive structure with only the selected file and no other empty folders)
extracted_filepath = archive.extract(file, local_folderpath)
# move the file to the root tci folder
extracted_filepath = Path(extracted_filepath)
filepath = local_folderpath / extracted_filepath.name
extracted_filepath.rename(filepath)
empty_product_foldername = Path(file.filename).parts[0] # Get the name of the now empty extracted folder
shutil.rmtree(str(local_folderpath/empty_product_foldername)) # And remove it
return filepath # only one element from the archive !
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'")
@staticmethod
def read_text_from_zip(zip_filepath: Path, predicate: Callable[[str], bool], byte=False, encoding: str = 'utf-8') -> str:
"""
Reads the text content of the first file from a zip archive that matches a given condition.
Args:
zip_filepath (Path): The path to the zip file.
predicate (Callable[[str], bool]): A function that takes a filename
(e.g., 'folder/file.txt') as input and returns True if it's the desired file.
byte (bool): If True, returns the file content as bytes. If False, returns as a string.
encoding (str): The text encoding to use for decoding the file bytes.
Defaults to 'utf-8'.
Returns:
str: The content of the found file as a string.
Raises:
FileNotFoundError: If no file matching the predicate is found in the archive.
UnicodeDecodeError: If the file cannot be decoded with the specified encoding.
"""
if not zip_filepath.exists():
raise FileNotFoundError(f"The specified zip file was not found: '{zip_filepath}'")
with zipfile.ZipFile(zip_filepath, mode="r") as archive:
for file_info in archive.infolist():
if not file_info.is_dir():
if predicate(file_info.filename):
file_bytes = archive.read(file_info.filename)
if byte:
return file_bytes
else:
return file_bytes.decode(encoding)
raise FileNotFoundError(f"No file matching the specified condition was found in '{zip_filepath}'")
if __name__=='__main__':
def tci_predicate(filename:str):
return filename.endswith('.png')
image = ZipUtils.extract_file_from_zip(Path("example.zip"), Path("data/"), tci_predicate)
image = ZipUtils.load_image_from_zip(Path("example.zip"), tci_predicate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment