Created
November 7, 2023 21:09
-
-
Save d-v-b/f460c7f673819d431cc958a04acbab8a to your computer and use it in GitHub Desktop.
crappy ome-ngff to xarray
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xarray_ome_ngff.registry import get_adapters | |
import zarr | |
from typing import Union | |
import dask.array as da | |
from xarray import DataArray | |
import os | |
def infer_coords(group: zarr.Group, array: zarr.Array): | |
# these conditionals should be handled by a lower-level validation function | |
if 'multiscales' in group.attrs: | |
multiscales = group.attrs['multiscales'] | |
if len(multiscales) > 0: | |
# note that technically the spec allows multiple references to the same zarr array | |
# because multiscales is a list | |
multiscale = multiscales[0] | |
ngff_version = multiscale.get("version", None) | |
# get the appropriate Multiscale model depending on the version | |
if ngff_version == "0.4": | |
from pydantic_ome_ngff.v04 import Multiscale | |
elif ngff_version == "0.5-dev": | |
from pydantic_ome_ngff.latest import Multiscale | |
else: | |
raise ValueError( | |
"Could not resolve the version of the multiscales metadata ", | |
f"found in the group metadata {dict(group.attrs)}", | |
) | |
else: | |
raise ValueError("Multiscales attribute was empty.") | |
else: | |
raise ValueError("Multiscales attribute not found.") | |
xarray_adapters = get_adapters(ngff_version) | |
multiscales_meta = [Multiscale(**entry) for entry in multiscales] | |
transforms = [] | |
axes = [] | |
matched_multiscale, matched_dataset = None, None | |
# find the correct element in multiscales.datasets for this array | |
for multi in multiscales_meta: | |
for dataset in multi.datasets: | |
if dataset.path == array.basename: | |
matched_multiscale = multi | |
matched_dataset = dataset | |
if matched_dataset is None or matched_multiscale is None: | |
raise ValueError( | |
f""" | |
Could not find an entry referencing array {array.basename} | |
in the `multiscales` metadata of the parent group. | |
""" | |
) | |
else: | |
if matched_multiscale.coordinateTransformations is not None: | |
transforms.extend(matched_multiscale.coordinateTransformations) | |
transforms.extend(matched_dataset.coordinateTransformations) | |
axes.extend(matched_multiscale.axes) | |
coords = xarray_adapters.transforms_to_coords(axes, transforms, array.shape) | |
return coords | |
def read_dataarray(group: zarr.Group, array: zarr.Array, use_dask: bool = True, **kwargs) -> DataArray: | |
coords = infer_coords(group, array) | |
if use_dask: | |
data = da.from_array(array, **kwargs) | |
else: | |
data = array | |
return DataArray(data, coords) | |
def test_read_dataarray(): | |
path = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr/" | |
z_group = zarr.open(path, mode='r') | |
z_array = zarr.open(store=z_group.store, path = '0') | |
d_array = read_dataarray(z_group, z_array) | |
print(d_array) | |
if __name__ == '__main__': | |
test_read_dataarray() | |
""" | |
<xarray.DataArray 'array-0a22e19c51aed195a0b364219bd996aa' (c: 2, z: 236, | |
y: 275, x: 271)> | |
dask.array<array, shape=(2, 236, 275, 271), dtype=uint16, chunksize=(2, 236, 275, 271), chunktype=numpy.ndarray> | |
Coordinates: | |
* c (c) float64 0.0 1.0 | |
* z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5 | |
* y (y) float64 0.0 0.3604 0.7208 1.081 ... 97.67 98.03 98.39 98.75 | |
* x (x) float64 0.0 0.3604 0.7208 1.081 ... 96.23 96.59 96.95 97.31 | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment