Skip to content

Instantly share code, notes, and snippets.

@scottyhq
Created November 6, 2023 22:48
Show Gist options
  • Save scottyhq/effa642f00112971e2350d921a0aed9d to your computer and use it in GitHub Desktop.
Save scottyhq/effa642f00112971e2350d921a0aed9d to your computer and use it in GitHub Desktop.
to_zarr troubleshooting from xarray
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3db81af4-ebfd-4764-b729-81f1911ff1a6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#%pip install --upgrade s3fs xarray zarr watermark"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "21faefc4-fd62-499d-b4bb-b0738e33eeb6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext watermark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d0dbcd74-a34e-4592-9989-8b18f2ed864f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import xarray as xr\n",
"import s3fs\n",
"import zarr\n",
"import fsspec"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e8428552-32ea-4b25-a900-a0c52b19f808",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fsspec: 2023.10.0\n",
"xarray: 2023.10.1\n",
"zarr : 2.16.1\n",
"s3fs : 2023.10.0\n",
"\n"
]
}
],
"source": [
"%watermark -iv"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8fe8f8c5-9146-4cbb-a438-fe71290b250b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import psutil\n",
"import logging"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "27a6b97f-31d8-42fb-8544-b2a219496d19",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"fs = fsspec.filesystem(\"simplecache\", \n",
" cache_storage='/tmp/files/',\n",
" target_protocol='http',\n",
" same_names=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "46e82261-3ab1-4eba-be96-265e83818501",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 479 ms, sys: 118 ms, total: 597 ms\n",
"Wall time: 1.41 s\n"
]
}
],
"source": [
"%%time\n",
"url = 'https://www.unidata.ucar.edu/software/netcdf/examples/ECMWF_ERA-40_subset.nc'\n",
"with fs.open(url) as f:\n",
" ds = xr.open_dataset(f).load()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0ca98f7f-fa20-4248-860b-e7e2ea4cdc0d",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ECMWF_ERA-40_subset.nc\n"
]
}
],
"source": [
"!ls /tmp/files/"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c4ea146b-0db9-4dd0-a6db-8ada58a0ccf5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Mimic uncompressed unchunked 3D arrays within netCDF\n",
"for data_var in ds.data_vars:\n",
" ds[data_var].encoding['compressor']=None\n",
" ds[data_var].encoding['chunks']=-1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4b4b52ac-ab29-4fa2-ab72-804d930883b7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 125 ms, sys: 44.1 ms, total: 169 ms\n",
"Wall time: 169 ms\n"
]
}
],
"source": [
"%%time \n",
"\n",
"lpath = '/tmp/ECMWF_ERA-40_subset.zarr'\n",
"\n",
"zstore = ds.to_zarr(lpath, mode='w')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "34639f80-3f9b-45ba-9db7-5f80ee46de6e",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"│   └── .zattrs\n",
"├── tp\n",
"│   ├── 0.0.0\n",
"│   ├── .zarray\n",
"│   └── .zattrs\n",
"├── .zattrs\n",
"├── .zgroup\n",
"└── .zmetadata\n",
"\n",
"20 directories, 63 files\n"
]
}
],
"source": [
"!tree -a /tmp/ECMWF_ERA-40_subset.zarr | tail"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "fc96fcef-3e0a-457a-ac4f-ca38700614b6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"s3 = s3fs.S3FileSystem()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "2840f923-c5de-43c9-9f08-0f09ee122d47",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 405 ms, sys: 112 ms, total: 517 ms\n",
"Wall time: 996 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"rpath = f's3://nasa-cryo-scratch/scottyhq/{lpath}'\n",
"\n",
"result = s3.put(lpath, rpath, recursive=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "7fce8622-a6db-45ac-88fb-a5e05d7caf30",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"log = logging.getLogger('s3fs')\n",
"log.setLevel(logging.DEBUG)\n",
"file_handle = logging.FileHandler(\"debug.log\")\n",
"log.addHandler(file_handle) \n",
"\n",
"formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',\n",
" datefmt='%H:%M:%S')\n",
"file_handle.setFormatter(formatter)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4dbd1f72-72a8-45ea-a0ee-fae36f596378",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"store = s3fs.S3Map(root='s3://nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr', \n",
" s3=s3, \n",
" check=False)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "ffa0ccbf-26ff-4367-8754-f5d3c91d5b6c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.71 s, sys: 149 ms, total: 1.86 s\n",
"Wall time: 14.5 s\n"
]
},
{
"data": {
"text/plain": [
"<xarray.backends.zarr.ZarrStore at 0x7f766ab27f40>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"\n",
"ds.to_zarr(store=store, mode='w')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "3c3f9991-b310-4338-a937-c6fd47df21ca",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"22:47:12.167 DEBUG Get directory listing page for nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr\n",
"22:47:12.215 DEBUG Get directory listing page for nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr/.zgroup\n",
"22:47:12.229 DEBUG CALL: list_objects_v2 - () - {'MaxKeys': 1, 'Bucket': 'nasa-cryo-scratch'}\n",
"22:47:12.243 DEBUG CALL: put_object - () - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zgroup'}\n",
"22:47:12.272 DEBUG CALL: head_object - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zarray'}\n",
"22:47:12.293 DEBUG Client error (maybe retryable): An error occurred (404) when calling the HeadObject operation: Not Found\n",
"22:47:12.293 DEBUG CALL: list_objects_v2 - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Prefix': 'scottyhq/zarr_from_xarray.zarr/.zarray/', 'Delimiter': '/', 'MaxKeys': 1}\n",
"22:47:12.330 DEBUG CALL: get_object - () - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zgroup'}\n",
"22:47:12.352 DEBUG CALL: head_object - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/longitude/.zarray'}\n",
"22:47:12.364 DEBUG Client error (maybe retryable): An error occurred (404) when calling the HeadObject operation: Not Found\n"
]
}
],
"source": [
"!head debug.log"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14517b92-10a9-4549-9cc9-8274de10cf51",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment