Created
April 1, 2026 08:10
-
-
Save raphaeljolivet/8430d2a84caa6da5c75deeecd927f9fc to your computer and use it in GitHub Desktop.
Adapter of numcodecs for Zarr3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Self, Type | |
| from zarr.abc.codec import ArrayBytesCodec | |
| from zarr.core import NDBuffer | |
| from zarr.core.buffer.cpu import Buffer | |
| from zarr.core.array_spec import ArraySpec | |
| import numpy as np | |
| from zarr.registry import register_codec | |
| from numcodecs.abc import Codec | |
| class AbstractNumcodecAdapter(ArrayBytesCodec) : | |
| """ | |
| Generic adapter class from zarr v2 codec to a zarr v3 serializer (not compressor). | |
| """ | |
| is_fixed_size = False | |
| def __init__(self, codec_class=None, *args, **kwargs) -> None: | |
| super().__init__() | |
| codec = codec_class(*args, **kwargs) | |
| setattr(self, "codec", codec) | |
| @classmethod | |
| def from_dict(cls, data: dict) -> Self: | |
| args = data["args"] | |
| return cls(**args) | |
| def to_dict(self) -> dict: | |
| codec = getattr(self, "codec") | |
| args = codec.__dict__.copy() | |
| args = {key: val for key, val in args.items() if val is not None} | |
| return dict( | |
| name=codec.__class__.codec_id, | |
| args=args) | |
| async def _decode_single( | |
| self, | |
| chunk_bytes: Buffer, | |
| chunk_spec: ArraySpec, | |
| ) -> NDBuffer: | |
| return self._decode_sync(chunk_bytes, chunk_spec) | |
| async def _encode_single( | |
| self, | |
| chunk_array: NDBuffer, | |
| chunk_spec: ArraySpec, | |
| ) -> Buffer | None: | |
| return self._encode_sync(chunk_array, chunk_spec) | |
| def _encode_sync( | |
| self, | |
| chunk_array: NDBuffer, | |
| chunk_spec: ArraySpec, | |
| ) -> Buffer | None: | |
| codec : Codec = getattr(self, "codec") | |
| # Reshape to original shape before encoding | |
| array = chunk_array._data.reshape(chunk_spec.shape) | |
| bytes = codec.encode(array) | |
| return Buffer.from_bytes(bytes) | |
| def _decode_sync( | |
| self, | |
| chunk_bytes: Buffer, | |
| chunk_spec: ArraySpec, | |
| ) -> NDBuffer: | |
| codec: Codec = getattr(self, "codec") | |
| # Init output array | |
| out = np.empty(chunk_spec.shape, dtype=chunk_spec.dtype.to_native_dtype()) | |
| # Decode | |
| codec.decode(chunk_bytes.to_bytes(), out) | |
| return chunk_spec.prototype.nd_buffer.from_ndarray_like(out) | |
| def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: | |
| raise NotImplementedError() | |
| def adapt_codec(codec_class:Type[Codec]) -> Type[AbstractNumcodecAdapter]: | |
| """ | |
| Transforms a Zarr V2 / Numcodec codec to a Zarr V3 one to the use as 'serializer' (not compressor). | |
| This function also registers the codec with the same codec_id. | |
| It should be called both when saving or loading Zarr with this encoding. | |
| Example | |
| ------- | |
| This exmaple adapts the Jpeg2000 codec for Zarr3 | |
| >>> from imagecodecs.numcodecs import Jpeg2k | |
| >>> | |
| >>> Jpeg2kZarr3 = adapt_codec(Jpeg2k) | |
| >>> | |
| >>> # ... Xarray dataset creation goes here ... | |
| >>> | |
| >>> dataset.to_zarr( | |
| >>> ouput_folder, mode="w", | |
| >>> zarr_format=3, | |
| >>> encoding={"var1": { | |
| >>> "serializer" : Jpeg2kZarr3(), | |
| >>> }}) | |
| """ | |
| class DynamicClass(AbstractNumcodecAdapter) : | |
| def __init__(self, *args, **kwargs) -> None: | |
| super().__init__(codec_class=codec_class, *args, **kwargs) | |
| DynamicClass.__name__ = f"{codec_class.__name__}Adapter" | |
| # Register codec with the initial codec id | |
| register_codec(codec_class.codec_id, DynamicClass) | |
| return DynamicClass | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage