Created
November 6, 2023 22:48
-
-
Save scottyhq/effa642f00112971e2350d921a0aed9d to your computer and use it in GitHub Desktop.
to_zarr troubleshooting from xarray
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3db81af4-ebfd-4764-b729-81f1911ff1a6", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#%pip install --upgrade s3fs xarray zarr watermark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "21faefc4-fd62-499d-b4bb-b0738e33eeb6", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext watermark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "d0dbcd74-a34e-4592-9989-8b18f2ed864f", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import xarray as xr\n", | |
"import s3fs\n", | |
"import zarr\n", | |
"import fsspec" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "e8428552-32ea-4b25-a900-a0c52b19f808", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"fsspec: 2023.10.0\n", | |
"xarray: 2023.10.1\n", | |
"zarr : 2.16.1\n", | |
"s3fs : 2023.10.0\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"%watermark -iv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "8fe8f8c5-9146-4cbb-a438-fe71290b250b", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import psutil\n", | |
"import logging" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "27a6b97f-31d8-42fb-8544-b2a219496d19", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"fs = fsspec.filesystem(\"simplecache\", \n", | |
" cache_storage='/tmp/files/',\n", | |
" target_protocol='http',\n", | |
" same_names=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "46e82261-3ab1-4eba-be96-265e83818501", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 479 ms, sys: 118 ms, total: 597 ms\n", | |
"Wall time: 1.41 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"url = 'https://www.unidata.ucar.edu/software/netcdf/examples/ECMWF_ERA-40_subset.nc'\n", | |
"with fs.open(url) as f:\n", | |
" ds = xr.open_dataset(f).load()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "0ca98f7f-fa20-4248-860b-e7e2ea4cdc0d", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ECMWF_ERA-40_subset.nc\n" | |
] | |
} | |
], | |
"source": [ | |
"!ls /tmp/files/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "c4ea146b-0db9-4dd0-a6db-8ada58a0ccf5", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"# Mimic uncompressed unchunked 3D arrays within netCDF\n", | |
"for data_var in ds.data_vars:\n", | |
" ds[data_var].encoding['compressor']=None\n", | |
" ds[data_var].encoding['chunks']=-1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "4b4b52ac-ab29-4fa2-ab72-804d930883b7", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 125 ms, sys: 44.1 ms, total: 169 ms\n", | |
"Wall time: 169 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"\n", | |
"lpath = '/tmp/ECMWF_ERA-40_subset.zarr'\n", | |
"\n", | |
"zstore = ds.to_zarr(lpath, mode='w')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "34639f80-3f9b-45ba-9db7-5f80ee46de6e", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"│ └── .zattrs\n", | |
"├── tp\n", | |
"│ ├── 0.0.0\n", | |
"│ ├── .zarray\n", | |
"│ └── .zattrs\n", | |
"├── .zattrs\n", | |
"├── .zgroup\n", | |
"└── .zmetadata\n", | |
"\n", | |
"20 directories, 63 files\n" | |
] | |
} | |
], | |
"source": [ | |
"!tree -a /tmp/ECMWF_ERA-40_subset.zarr | tail" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "fc96fcef-3e0a-457a-ac4f-ca38700614b6", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"s3 = s3fs.S3FileSystem()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "2840f923-c5de-43c9-9f08-0f09ee122d47", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 405 ms, sys: 112 ms, total: 517 ms\n", | |
"Wall time: 996 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"rpath = f's3://nasa-cryo-scratch/scottyhq/{lpath}'\n", | |
"\n", | |
"result = s3.put(lpath, rpath, recursive=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "7fce8622-a6db-45ac-88fb-a5e05d7caf30", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"log = logging.getLogger('s3fs')\n", | |
"log.setLevel(logging.DEBUG)\n", | |
"file_handle = logging.FileHandler(\"debug.log\")\n", | |
"log.addHandler(file_handle) \n", | |
"\n", | |
"formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',\n", | |
" datefmt='%H:%M:%S')\n", | |
"file_handle.setFormatter(formatter)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "4dbd1f72-72a8-45ea-a0ee-fae36f596378", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"store = s3fs.S3Map(root='s3://nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr', \n", | |
" s3=s3, \n", | |
" check=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "ffa0ccbf-26ff-4367-8754-f5d3c91d5b6c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.71 s, sys: 149 ms, total: 1.86 s\n", | |
"Wall time: 14.5 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.backends.zarr.ZarrStore at 0x7f766ab27f40>" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"ds.to_zarr(store=store, mode='w')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "3c3f9991-b310-4338-a937-c6fd47df21ca", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"22:47:12.167 DEBUG Get directory listing page for nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr\n", | |
"22:47:12.215 DEBUG Get directory listing page for nasa-cryo-scratch/scottyhq/zarr_from_xarray.zarr/.zgroup\n", | |
"22:47:12.229 DEBUG CALL: list_objects_v2 - () - {'MaxKeys': 1, 'Bucket': 'nasa-cryo-scratch'}\n", | |
"22:47:12.243 DEBUG CALL: put_object - () - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zgroup'}\n", | |
"22:47:12.272 DEBUG CALL: head_object - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zarray'}\n", | |
"22:47:12.293 DEBUG Client error (maybe retryable): An error occurred (404) when calling the HeadObject operation: Not Found\n", | |
"22:47:12.293 DEBUG CALL: list_objects_v2 - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Prefix': 'scottyhq/zarr_from_xarray.zarr/.zarray/', 'Delimiter': '/', 'MaxKeys': 1}\n", | |
"22:47:12.330 DEBUG CALL: get_object - () - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/.zgroup'}\n", | |
"22:47:12.352 DEBUG CALL: head_object - ({},) - {'Bucket': 'nasa-cryo-scratch', 'Key': 'scottyhq/zarr_from_xarray.zarr/longitude/.zarray'}\n", | |
"22:47:12.364 DEBUG Client error (maybe retryable): An error occurred (404) when calling the HeadObject operation: Not Found\n" | |
] | |
} | |
], | |
"source": [ | |
"!head debug.log" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "14517b92-10a9-4549-9cc9-8274de10cf51", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment