Skip to content

Instantly share code, notes, and snippets.

@raphaeljolivet
Created April 1, 2026 08:10
Show Gist options
  • Select an option

  • Save raphaeljolivet/8430d2a84caa6da5c75deeecd927f9fc to your computer and use it in GitHub Desktop.

Select an option

Save raphaeljolivet/8430d2a84caa6da5c75deeecd927f9fc to your computer and use it in GitHub Desktop.
Adapter of numcodecs for Zarr3
from typing import Self, Type
from zarr.abc.codec import ArrayBytesCodec
from zarr.core import NDBuffer
from zarr.core.buffer.cpu import Buffer
from zarr.core.array_spec import ArraySpec
import numpy as np
from zarr.registry import register_codec
from numcodecs.abc import Codec
class AbstractNumcodecAdapter(ArrayBytesCodec) :
"""
Generic adapter class from zarr v2 codec to a zarr v3 serializer (not compressor).
"""
is_fixed_size = False
def __init__(self, codec_class=None, *args, **kwargs) -> None:
super().__init__()
codec = codec_class(*args, **kwargs)
setattr(self, "codec", codec)
@classmethod
def from_dict(cls, data: dict) -> Self:
args = data["args"]
return cls(**args)
def to_dict(self) -> dict:
codec = getattr(self, "codec")
args = codec.__dict__.copy()
args = {key: val for key, val in args.items() if val is not None}
return dict(
name=codec.__class__.codec_id,
args=args)
async def _decode_single(
self,
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> NDBuffer:
return self._decode_sync(chunk_bytes, chunk_spec)
async def _encode_single(
self,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> Buffer | None:
return self._encode_sync(chunk_array, chunk_spec)
def _encode_sync(
self,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> Buffer | None:
codec : Codec = getattr(self, "codec")
# Reshape to original shape before encoding
array = chunk_array._data.reshape(chunk_spec.shape)
bytes = codec.encode(array)
return Buffer.from_bytes(bytes)
def _decode_sync(
self,
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> NDBuffer:
codec: Codec = getattr(self, "codec")
# Init output array
out = np.empty(chunk_spec.shape, dtype=chunk_spec.dtype.to_native_dtype())
# Decode
codec.decode(chunk_bytes.to_bytes(), out)
return chunk_spec.prototype.nd_buffer.from_ndarray_like(out)
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
raise NotImplementedError()
def adapt_codec(codec_class:Type[Codec]) -> Type[AbstractNumcodecAdapter]:
"""
Transforms a Zarr V2 / Numcodec codec to a Zarr V3 one to the use as 'serializer' (not compressor).
This function also registers the codec with the same codec_id.
It should be called both when saving or loading Zarr with this encoding.
Example
-------
This exmaple adapts the Jpeg2000 codec for Zarr3
>>> from imagecodecs.numcodecs import Jpeg2k
>>>
>>> Jpeg2kZarr3 = adapt_codec(Jpeg2k)
>>>
>>> # ... Xarray dataset creation goes here ...
>>>
>>> dataset.to_zarr(
>>> ouput_folder, mode="w",
>>> zarr_format=3,
>>> encoding={"var1": {
>>> "serializer" : Jpeg2kZarr3(),
>>> }})
"""
class DynamicClass(AbstractNumcodecAdapter) :
def __init__(self, *args, **kwargs) -> None:
super().__init__(codec_class=codec_class, *args, **kwargs)
DynamicClass.__name__ = f"{codec_class.__name__}Adapter"
# Register codec with the initial codec id
register_codec(codec_class.codec_id, DynamicClass)
return DynamicClass
@raphaeljolivet

Copy link
Copy Markdown
Author

Usage

from imagecodecs.numcodecs import Jpeg2k

Jpeg2kZarr3 = adapt_codec(Jpeg2k)

# ... Xarray dataset creation goes here ...

dataset.to_zarr(
    ouput_folder,  mode="w",
    zarr_format=3,
   encoding={"var1": {
    "serializer" : Jpeg2kZarr3(),
}})

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment