Skip to content

Instantly share code, notes, and snippets.

@ljstrnadiii
Created November 25, 2025 20:22
Show Gist options
  • Select an option

  • Save ljstrnadiii/8a7e230563b3b1a54bf02307b4033d1b to your computer and use it in GitHub Desktop.

Select an option

Save ljstrnadiii/8a7e230563b3b1a54bf02307b4033d1b to your computer and use it in GitHub Desktop.
Zarr Chunk Index
from typing import Generator
import zarr
import numpy as np
from affine import Affine
from pyproj import CRS
from rasterio.windows import Window, bounds
from shapely.geometry import box
import geopandas as gpd
def compute_chunk_polys(array: zarr.Array) -> Generator[tuple[int, int, int, int], None, None]:
dims = array.metadata.dimension_names
chunk_h = array.chunks[dims.index("y")]
chunk_w = array.chunks[dims.index("x")]
H = array.shape[dims.index("y")]
W = array.shape[dims.index("x")]
ny = int(np.ceil(H / chunk_h))
nx = int(np.ceil(W / chunk_w))
for iy in range(ny):
for ix in range(nx):
x0_px = ix * chunk_w
y0_px = iy * chunk_h
x1_px = min(x0_px + chunk_w, W)
y1_px = min(y0_px + chunk_h, H)
yield (x0_px, y0_px, x1_px, y1_px)
store = zarr.open_consolidated(
"s3://union-sandbox-unionai-wherobots/wc/axwdzcpjzfwbglw6sv5h-n1-0-dn0-0/axwdzcpjzfwbglw6sv5h/59f3/cf2867b41771.zarr"
)
array = store["variables"]
# current "standard" for accessing spatial reference info
spatial_ref = store["spatial_ref"]
attrs = spatial_ref.metadata.attributes
crs = CRS.from_wkt(attrs["crs_wkt"])
transform = Affine.from_gdal(*map(float, attrs["GeoTransform"].split()))
# spatial index of chunks
gdf = gpd.GeoDataFrame(
geometry=[
box(*bounds(Window.from_slices((ex[1], ex[3]), (ex[0], ex[2])), transform))
for ex in compute_chunk_polys(array)
],
crs=crs,
)
gdf.drop_duplicates(subset='geometry').explore()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment