Skip to content

Instantly share code, notes, and snippets.

@prl900
Created August 30, 2021 08:29
Show Gist options
  • Save prl900/7e1dc3aa706c04f5c808f30a544f78c0 to your computer and use it in GitHub Desktop.
Save prl900/7e1dc3aa706c04f5c808f30a544f78c0 to your computer and use it in GitHub Desktop.
import datacube
import sys
import itertools
import numpy as np
import os
from datacube.drivers.netcdf import write_dataset_to_netcdf
# Code from: https://ncar.github.io/xdev/posts/writing-multiple-netcdf-files-in-parallel-with-xarray-and-dask/
def split_by_chunks(dataset):
chunk_slices = {}
for dim, chunks in dataset.chunks.items():
slices = []
start = 0
for chunk in chunks:
if start >= dataset.sizes[dim]:
break
stop = start + chunk
slices.append(slice(start, stop))
start = stop
chunk_slices[dim] = slices
for slices in itertools.product(*chunk_slices.values()):
selection = dict(zip(chunk_slices.keys(), slices))
yield dataset[selection]
dc = datacube.Datacube(app="Parallel_processing_with_Dask")
ds = dc.load(product="s2a_ard_granule",
measurements=["nbart_red", "nbart_green", "nbart_blue", "nbart_nir_1", "nbart_swir_2", "nbart_swir_3"],
crs="EPSG:3577",
x=(1500000.0, 1580000.0),
y=(-3930000.0, -4030000.0),
time=("2020-01-01", "2021-01-01"),
output_crs="EPSG:3577",
resolution=(-10, 10),
group_by="solar_day",
dask_chunks={"time": 100, "x": 400, "y": 400})
print(ds)
for chunk in split_by_chunks(ds):
x = int((chunk.x.values[0]-ds.x.values[0])/4000)
y = int((ds.y.values[0]-chunk.y.values[0])/4000)
print(x, y)
fname = f"/g/data/ub8/au/blobs/s2a_{x:02d}_{y:02d}_2020.nc"
if os.path.isfile(fname):
continue
write_dataset_to_netcdf(chunk, fname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment