Skip to content

Instantly share code, notes, and snippets.

@zcobell
Created December 6, 2023 15:56
Show Gist options
  • Save zcobell/51fd8533894b70dddaa3da9a0fc6413e to your computer and use it in GitHub Desktop.
Save zcobell/51fd8533894b70dddaa3da9a0fc6413e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "f62bbb18-25d5-4ad8-a382-15dffca3f355",
"metadata": {},
"source": [
"# Convert two zarr'd simulations to a kerchunk dataset with parquet reference\n",
"\n",
"This code attempts to generate sidecar files for each simulation and then write the data to a parquet reference set"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "63764ef2-8e74-4697-8993-e61fa9c70e23",
"metadata": {},
"outputs": [],
"source": [
"from kerchunk.zarr import ZarrToZarr\n",
"from kerchunk.combine import MultiZarrToZarr\n",
"from kerchunk.df import refs_to_dataframe\n",
"from fsspec.implementations.reference import LazyReferenceMapper\n",
"import fsspec\n",
"import ujson\n",
"import logging\n",
"import xarray as xr\n",
"import os\n",
"\n",
"logging.basicConfig(\n",
" level=logging.INFO,\n",
" format=\"%(asctime)s :: %(levelname)s :: %(filename)s :: %(funcName)s :: %(message)s\",\n",
" datefmt=\"%Y-%m-%dT%H:%M:%S%Z\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e63927ae-2dd2-4f62-bc4e-6a06cb15a8c2",
"metadata": {},
"source": [
"Set the filenames for the simulations. Note that the simulations are identical and just duplicated here to illustrate that we can properly kerchunk this type of data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ea1abeb1-fbee-4359-b04e-ef4c41bdc302",
"metadata": {},
"outputs": [],
"source": [
"zarr_filename_00 = \"result_2.zarr\"\n",
"zarr_filename_01 = \"result_3.zarr\"\n",
"files = [zarr_filename_00, zarr_filename_01]\n",
"json_files = []\n",
"zarr_refs = []"
]
},
{
"cell_type": "markdown",
"id": "7e720565-57ab-4edc-ac1c-26937dd4d88d",
"metadata": {},
"source": [
"Generate sidecar files for each simulation in the list. It's unclear to me why I need to include the `reject_bytes=False` key"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b749f513-8bb7-445e-a998-b2c0dfc9f690",
"metadata": {},
"outputs": [],
"source": [
"fs = fsspec.filesystem(\"\")\n",
"for f in files:\n",
" zarr_refs.append(ZarrToZarr(f).translate())\n",
" json_files.append(f + \".json\")\n",
" with fs.open(json_files[-1], \"wb\") as ff:\n",
" ff.write(ujson.dumps(zarr_refs[-1], reject_bytes=False).encode())"
]
},
{
"cell_type": "markdown",
"id": "f56b6702-6d29-4b5a-87ba-72660e3367a9",
"metadata": {},
"source": [
"First, lets write to a regular json sidecar to prove things are working"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "73ab74de-3016-4035-a2b0-1ef0eb7fa7ef",
"metadata": {},
"outputs": [],
"source": [
"new_dims = [1, 2]\n",
"mzz_json = MultiZarrToZarr(json_files, \n",
" coo_map={\"append_dim\": new_dims}, \n",
" concat_dims=[\"append_dim\"],\n",
" identical_dims=[\"time\", \"node\", \"nele\", \"latitude\", \"longitude\"],\n",
")\n",
"ref = mzz_json.translate()\n",
"\n",
"with fs.open(\"kerchunked.json\", \"wb\") as ff:\n",
" ff.write(ujson.dumps(ref).encode())"
]
},
{
"cell_type": "markdown",
"id": "5fc4e5ab-a7db-446e-87eb-49ef6592e05c",
"metadata": {},
"source": [
"Lets open the data in zarr/xarray and see what's what\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f442dd4e-13ed-4463-8ec5-aed27063d041",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-12-06T10:54:16EST :: INFO :: reference.py :: __init__ :: Read reference from URL kerchunked.json\n"
]
}
],
"source": [
"backend_args = {\n",
" \"consolidated\": False,\n",
" \"storage_options\": {\n",
" \"fo\": \"kerchunked.json\"\n",
" },\n",
"}\n",
"ds_json = xr.open_dataset(\"reference://\", engine=\"zarr\", backend_kwargs=backend_args)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b691ee24-8a8f-4c6f-819e-43677a59f4ee",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body[data-theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block !important;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-index-preview {\n",
" grid-column: 2 / 5;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data,\n",
".xr-index-data-in:checked ~ .xr-index-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-index-name div,\n",
".xr-index-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt,\n",
".xr-attrs dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2,\n",
".xr-no-icon {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
"Dimensions: (append_dim: 2, time: 252, node: 8303,\n",
" nele: 14761, nvertex: 3)\n",
"Coordinates:\n",
" * append_dim (append_dim) int64 1 2\n",
" latitude (node) float64 ...\n",
" longitude (node) float64 ...\n",
" * time (time) datetime64[ns] 2020-04-28T01:20:00...\n",
"Dimensions without coordinates: node, nele, nvertex\n",
"Data variables:\n",
" air_pressure_at_sea_level (append_dim, time, node) float64 ...\n",
" eastward_water_velocity (append_dim, time, node) float64 ...\n",
" eastward_wind (append_dim, time, node) float64 ...\n",
" element (append_dim, nele, nvertex) int32 ...\n",
" northward_water_velocity (append_dim, time, node) float64 ...\n",
" northward_wind (append_dim, time, node) float64 ...\n",
" sea_surface_height_above_geoid (append_dim, time, node) float64 ...</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-24ffb6dc-26f8-4033-bf70-de9d6651137e' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-24ffb6dc-26f8-4033-bf70-de9d6651137e' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span class='xr-has-index'>append_dim</span>: 2</li><li><span class='xr-has-index'>time</span>: 252</li><li><span>node</span>: 8303</li><li><span>nele</span>: 14761</li><li><span>nvertex</span>: 3</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-5ea71d2a-52d2-4bef-9bed-cadc76381204' class='xr-section-summary-in' type='checkbox' checked><label for='section-5ea71d2a-52d2-4bef-9bed-cadc76381204' class='xr-section-summary' >Coordinates: <span>(4)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>append_dim</span></div><div class='xr-var-dims'>(append_dim)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1 2</div><input id='attrs-b743d770-768c-43cc-9d6b-0a3d9bdef270' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-b743d770-768c-43cc-9d6b-0a3d9bdef270' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-0d488b40-72fc-44a9-95f3-bbcff7400b6a' class='xr-var-data-in' type='checkbox'><label for='data-0d488b40-72fc-44a9-95f3-bbcff7400b6a' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([1, 2])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>latitude</span></div><div class='xr-var-dims'>(node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-8e08f675-2058-4253-9c91-8eec7b1bd420' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-8e08f675-2058-4253-9c91-8eec7b1bd420' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-07dd40bb-ace2-43b0-b718-365cf4230ceb' class='xr-var-data-in' type='checkbox'><label for='data-07dd40bb-ace2-43b0-b718-365cf4230ceb' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>long_name :</span></dt><dd>latitude</dd><dt><span>positive :</span></dt><dd>north</dd><dt><span>standard_name :</span></dt><dd>latitude</dd><dt><span>units :</span></dt><dd>degrees_north</dd></dl></div><div class='xr-var-data'><pre>[8303 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>longitude</span></div><div class='xr-var-dims'>(node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-7753e223-77b3-413d-80c8-83fb384e3208' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-7753e223-77b3-413d-80c8-83fb384e3208' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-0b969d86-b447-43d3-9a34-882b75555d09' class='xr-var-data-in' type='checkbox'><label for='data-0b969d86-b447-43d3-9a34-882b75555d09' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>long_name :</span></dt><dd>longitude</dd><dt><span>positive :</span></dt><dd>east</dd><dt><span>standard_name :</span></dt><dd>longitude</dd><dt><span>units :</span></dt><dd>degrees_east</dd></dl></div><div class='xr-var-data'><pre>[8303 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>time</span></div><div class='xr-var-dims'>(time)</div><div class='xr-var-dtype'>datetime64[ns]</div><div class='xr-var-preview xr-preview'>2020-04-28T01:20:00 ... 2020-05-12</div><input id='attrs-b97f8526-ccba-4234-8eff-30d352fd341a' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-b97f8526-ccba-4234-8eff-30d352fd341a' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-a3f59635-905f-4eb2-9e9b-43f3a15d9d04' class='xr-var-data-in' type='checkbox'><label for='data-a3f59635-905f-4eb2-9e9b-43f3a15d9d04' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>base_date :</span></dt><dd>2020-04-28 00:00:00</dd><dt><span>long_name :</span></dt><dd>model time</dd><dt><span>standard_name :</span></dt><dd>time</dd></dl></div><div class='xr-var-data'><pre>array([&#x27;2020-04-28T01:20:00.000000000&#x27;, &#x27;2020-04-28T02:40:00.000000000&#x27;,\n",
" &#x27;2020-04-28T04:00:00.000000000&#x27;, ..., &#x27;2020-05-11T21:20:00.000000000&#x27;,\n",
" &#x27;2020-05-11T22:40:00.000000000&#x27;, &#x27;2020-05-12T00:00:00.000000000&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-178762da-7ecd-4b6c-aaee-adad26f1f658' class='xr-section-summary-in' type='checkbox' checked><label for='section-178762da-7ecd-4b6c-aaee-adad26f1f658' class='xr-section-summary' >Data variables: <span>(7)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>air_pressure_at_sea_level</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-123fe86c-bfc1-43f6-96c9-5501befaaf9d' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-123fe86c-bfc1-43f6-96c9-5501befaaf9d' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-68501ff4-df49-4062-9ae5-0f43894fe2a9' class='xr-var-data-in' type='checkbox'><label for='data-68501ff4-df49-4062-9ae5-0f43894fe2a9' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>air pressure at sea level</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>standard_name :</span></dt><dd>air_pressure_at_sea_level</dd><dt><span>units :</span></dt><dd>meters of water</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>eastward_water_velocity</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-baeac7c5-ec8a-4787-b7c9-7d5735731db5' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-baeac7c5-ec8a-4787-b7c9-7d5735731db5' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-b9857e30-ba46-4cdb-8f17-67ff90659ea5' class='xr-var-data-in' type='checkbox'><label for='data-b9857e30-ba46-4cdb-8f17-67ff90659ea5' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>dry_Value :</span></dt><dd>-99999.0</dd><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>water column vertically averaged east/west velocity</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>positive :</span></dt><dd>east</dd><dt><span>standard_name :</span></dt><dd>eastward_water_velocity</dd><dt><span>units :</span></dt><dd>m s-1</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>eastward_wind</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-12717cd3-d3e1-49aa-b514-049cb1a2e3de' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-12717cd3-d3e1-49aa-b514-049cb1a2e3de' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-05bbeff3-e095-4d4c-b69e-be2ff221453e' class='xr-var-data-in' type='checkbox'><label for='data-05bbeff3-e095-4d4c-b69e-be2ff221453e' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>e/w wind velocity</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>positive :</span></dt><dd>east</dd><dt><span>standard_name :</span></dt><dd>eastward_wind</dd><dt><span>units :</span></dt><dd>m s-1</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>element</span></div><div class='xr-var-dims'>(append_dim, nele, nvertex)</div><div class='xr-var-dtype'>int32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-5e7928c2-0179-4b1e-9e20-9bbe0c38954e' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-5e7928c2-0179-4b1e-9e20-9bbe0c38954e' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-aa241017-21f6-4a2a-8dd9-8318b74cd0e6' class='xr-var-data-in' type='checkbox'><label for='data-aa241017-21f6-4a2a-8dd9-8318b74cd0e6' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>cf_role :</span></dt><dd>face_node_connectivity</dd><dt><span>long_name :</span></dt><dd>element</dd><dt><span>start_index :</span></dt><dd>1</dd><dt><span>units :</span></dt><dd>nondimensional</dd></dl></div><div class='xr-var-data'><pre>[88566 values with dtype=int32]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>northward_water_velocity</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-b821f07f-9f82-404a-8561-b981e096cc1b' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-b821f07f-9f82-404a-8561-b981e096cc1b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-39290a5e-841b-4e85-98e3-148f89062538' class='xr-var-data-in' type='checkbox'><label for='data-39290a5e-841b-4e85-98e3-148f89062538' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>dry_Value :</span></dt><dd>-99999.0</dd><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>water column vertically averaged north/south velocity</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>positive :</span></dt><dd>north</dd><dt><span>standard_name :</span></dt><dd>northward_water_velocity</dd><dt><span>units :</span></dt><dd>m s-1</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>northward_wind</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-02da6b7d-bd43-4051-a101-6a0db92f5bf5' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-02da6b7d-bd43-4051-a101-6a0db92f5bf5' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-8a01107a-0797-4c28-ba1f-cc65e8429105' class='xr-var-data-in' type='checkbox'><label for='data-8a01107a-0797-4c28-ba1f-cc65e8429105' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>n/s wind velocity</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>positive :</span></dt><dd>north</dd><dt><span>standard_name :</span></dt><dd>northward_wind</dd><dt><span>units :</span></dt><dd>m s-1</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>sea_surface_height_above_geoid</span></div><div class='xr-var-dims'>(append_dim, time, node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-a77f9ab4-f607-4495-b17c-f95f3fb00209' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-a77f9ab4-f607-4495-b17c-f95f3fb00209' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-6250cde3-bc1e-4dca-9285-6ffeff53e470' class='xr-var-data-in' type='checkbox'><label for='data-6250cde3-bc1e-4dca-9285-6ffeff53e470' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>water surface elevation above geoid</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>standard_name :</span></dt><dd>sea_surface_height_above_geoid</dd><dt><span>units :</span></dt><dd>m</dd></dl></div><div class='xr-var-data'><pre>[4184712 values with dtype=float64]</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-bc9b9fd4-c116-49b8-b738-35f75ae35a09' class='xr-section-summary-in' type='checkbox' ><label for='section-bc9b9fd4-c116-49b8-b738-35f75ae35a09' class='xr-section-summary' >Indexes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>append_dim</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-cf985960-05ab-46fd-b51c-88c6ad6efd18' class='xr-index-data-in' type='checkbox'/><label for='index-cf985960-05ab-46fd-b51c-88c6ad6efd18' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([1, 2], dtype=&#x27;int64&#x27;, name=&#x27;append_dim&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>time</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-07791521-fdd4-42ba-a08a-257716231f7e' class='xr-index-data-in' type='checkbox'/><label for='index-07791521-fdd4-42ba-a08a-257716231f7e' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(DatetimeIndex([&#x27;2020-04-28 01:20:00&#x27;, &#x27;2020-04-28 02:40:00&#x27;,\n",
" &#x27;2020-04-28 04:00:00&#x27;, &#x27;2020-04-28 05:20:00&#x27;,\n",
" &#x27;2020-04-28 06:40:00&#x27;, &#x27;2020-04-28 08:00:00&#x27;,\n",
" &#x27;2020-04-28 09:20:00&#x27;, &#x27;2020-04-28 10:40:00&#x27;,\n",
" &#x27;2020-04-28 12:00:00&#x27;, &#x27;2020-04-28 13:20:00&#x27;,\n",
" ...\n",
" &#x27;2020-05-11 12:00:00&#x27;, &#x27;2020-05-11 13:20:00&#x27;,\n",
" &#x27;2020-05-11 14:40:00&#x27;, &#x27;2020-05-11 16:00:00&#x27;,\n",
" &#x27;2020-05-11 17:20:00&#x27;, &#x27;2020-05-11 18:40:00&#x27;,\n",
" &#x27;2020-05-11 20:00:00&#x27;, &#x27;2020-05-11 21:20:00&#x27;,\n",
" &#x27;2020-05-11 22:40:00&#x27;, &#x27;2020-05-12 00:00:00&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;, name=&#x27;time&#x27;, length=252, freq=None))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-a703a272-9b47-465f-8089-3aa407498686' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-a703a272-9b47-465f-8089-3aa407498686' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (append_dim: 2, time: 252, node: 8303,\n",
" nele: 14761, nvertex: 3)\n",
"Coordinates:\n",
" * append_dim (append_dim) int64 1 2\n",
" latitude (node) float64 ...\n",
" longitude (node) float64 ...\n",
" * time (time) datetime64[ns] 2020-04-28T01:20:00...\n",
"Dimensions without coordinates: node, nele, nvertex\n",
"Data variables:\n",
" air_pressure_at_sea_level (append_dim, time, node) float64 ...\n",
" eastward_water_velocity (append_dim, time, node) float64 ...\n",
" eastward_wind (append_dim, time, node) float64 ...\n",
" element (append_dim, nele, nvertex) int32 ...\n",
" northward_water_velocity (append_dim, time, node) float64 ...\n",
" northward_wind (append_dim, time, node) float64 ...\n",
" sea_surface_height_above_geoid (append_dim, time, node) float64 ..."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds_json"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ca725334-d767-4af2-a512-d8f1fbd48741",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body[data-theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block !important;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-index-preview {\n",
" grid-column: 2 / 5;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data,\n",
".xr-index-data-in:checked ~ .xr-index-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-index-name div,\n",
".xr-index-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt,\n",
".xr-attrs dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2,\n",
".xr-no-icon {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.DataArray &#x27;sea_surface_height_above_geoid&#x27; (append_dim: 2, time: 252,\n",
" node: 8303)&gt;\n",
"[4184712 values with dtype=float64]\n",
"Coordinates:\n",
" * append_dim (append_dim) int64 1 2\n",
" latitude (node) float64 ...\n",
" longitude (node) float64 ...\n",
" * time (time) datetime64[ns] 2020-04-28T01:20:00 ... 2020-05-12\n",
"Dimensions without coordinates: node\n",
"Attributes:\n",
" location: node\n",
" long_name: water surface elevation above geoid\n",
" mesh: adcirc_mesh\n",
" standard_name: sea_surface_height_above_geoid\n",
" units: m</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'sea_surface_height_above_geoid'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>append_dim</span>: 2</li><li><span class='xr-has-index'>time</span>: 252</li><li><span>node</span>: 8303</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-4a1f87aa-1cab-4a1a-ab2f-a927bdad70af' class='xr-array-in' type='checkbox' checked><label for='section-4a1f87aa-1cab-4a1a-ab2f-a927bdad70af' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>...</span></div><div class='xr-array-data'><pre>[4184712 values with dtype=float64]</pre></div></div></li><li class='xr-section-item'><input id='section-9be5fbe1-f283-4a5a-87d6-07c2d943a5de' class='xr-section-summary-in' type='checkbox' checked><label for='section-9be5fbe1-f283-4a5a-87d6-07c2d943a5de' class='xr-section-summary' >Coordinates: <span>(4)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>append_dim</span></div><div class='xr-var-dims'>(append_dim)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>1 2</div><input id='attrs-ddf3205b-d2b5-4b19-9a08-76000264b40f' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-ddf3205b-d2b5-4b19-9a08-76000264b40f' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-95e382f9-0ae8-48f3-87f3-47f79e600f3e' class='xr-var-data-in' type='checkbox'><label for='data-95e382f9-0ae8-48f3-87f3-47f79e600f3e' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([1, 2])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>latitude</span></div><div class='xr-var-dims'>(node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-65a93dd3-a688-4d30-a38c-810b1d3a35c3' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-65a93dd3-a688-4d30-a38c-810b1d3a35c3' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-8c5dc921-6024-4248-9a72-ec79a45056c2' class='xr-var-data-in' type='checkbox'><label for='data-8c5dc921-6024-4248-9a72-ec79a45056c2' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>long_name :</span></dt><dd>latitude</dd><dt><span>positive :</span></dt><dd>north</dd><dt><span>standard_name :</span></dt><dd>latitude</dd><dt><span>units :</span></dt><dd>degrees_north</dd></dl></div><div class='xr-var-data'><pre>[8303 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>longitude</span></div><div class='xr-var-dims'>(node)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-778bf1b8-1537-413c-b230-78e2b649511b' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-778bf1b8-1537-413c-b230-78e2b649511b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-0071bf97-b683-4b87-8ae5-0351fcb866a1' class='xr-var-data-in' type='checkbox'><label for='data-0071bf97-b683-4b87-8ae5-0351fcb866a1' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>long_name :</span></dt><dd>longitude</dd><dt><span>positive :</span></dt><dd>east</dd><dt><span>standard_name :</span></dt><dd>longitude</dd><dt><span>units :</span></dt><dd>degrees_east</dd></dl></div><div class='xr-var-data'><pre>[8303 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>time</span></div><div class='xr-var-dims'>(time)</div><div class='xr-var-dtype'>datetime64[ns]</div><div class='xr-var-preview xr-preview'>2020-04-28T01:20:00 ... 2020-05-12</div><input id='attrs-27520ec7-2c50-4520-9873-671951a203e5' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-27520ec7-2c50-4520-9873-671951a203e5' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-65d21136-8110-4b25-9554-429cb0c8131d' class='xr-var-data-in' type='checkbox'><label for='data-65d21136-8110-4b25-9554-429cb0c8131d' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>base_date :</span></dt><dd>2020-04-28 00:00:00</dd><dt><span>long_name :</span></dt><dd>model time</dd><dt><span>standard_name :</span></dt><dd>time</dd></dl></div><div class='xr-var-data'><pre>array([&#x27;2020-04-28T01:20:00.000000000&#x27;, &#x27;2020-04-28T02:40:00.000000000&#x27;,\n",
" &#x27;2020-04-28T04:00:00.000000000&#x27;, ..., &#x27;2020-05-11T21:20:00.000000000&#x27;,\n",
" &#x27;2020-05-11T22:40:00.000000000&#x27;, &#x27;2020-05-12T00:00:00.000000000&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-5677bbc4-2863-4155-bbb8-122f4bdb5268' class='xr-section-summary-in' type='checkbox' ><label for='section-5677bbc4-2863-4155-bbb8-122f4bdb5268' class='xr-section-summary' >Indexes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>append_dim</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-ed358e7b-f052-40a7-8003-b8d65b56eb9b' class='xr-index-data-in' type='checkbox'/><label for='index-ed358e7b-f052-40a7-8003-b8d65b56eb9b' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([1, 2], dtype=&#x27;int64&#x27;, name=&#x27;append_dim&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>time</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-c1711afb-4cbe-47a9-ab7a-ddaf4b3927a3' class='xr-index-data-in' type='checkbox'/><label for='index-c1711afb-4cbe-47a9-ab7a-ddaf4b3927a3' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(DatetimeIndex([&#x27;2020-04-28 01:20:00&#x27;, &#x27;2020-04-28 02:40:00&#x27;,\n",
" &#x27;2020-04-28 04:00:00&#x27;, &#x27;2020-04-28 05:20:00&#x27;,\n",
" &#x27;2020-04-28 06:40:00&#x27;, &#x27;2020-04-28 08:00:00&#x27;,\n",
" &#x27;2020-04-28 09:20:00&#x27;, &#x27;2020-04-28 10:40:00&#x27;,\n",
" &#x27;2020-04-28 12:00:00&#x27;, &#x27;2020-04-28 13:20:00&#x27;,\n",
" ...\n",
" &#x27;2020-05-11 12:00:00&#x27;, &#x27;2020-05-11 13:20:00&#x27;,\n",
" &#x27;2020-05-11 14:40:00&#x27;, &#x27;2020-05-11 16:00:00&#x27;,\n",
" &#x27;2020-05-11 17:20:00&#x27;, &#x27;2020-05-11 18:40:00&#x27;,\n",
" &#x27;2020-05-11 20:00:00&#x27;, &#x27;2020-05-11 21:20:00&#x27;,\n",
" &#x27;2020-05-11 22:40:00&#x27;, &#x27;2020-05-12 00:00:00&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;, name=&#x27;time&#x27;, length=252, freq=None))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-88173818-e394-442f-8ba5-a9bcd4796cfc' class='xr-section-summary-in' type='checkbox' checked><label for='section-88173818-e394-442f-8ba5-a9bcd4796cfc' class='xr-section-summary' >Attributes: <span>(5)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'><dt><span>location :</span></dt><dd>node</dd><dt><span>long_name :</span></dt><dd>water surface elevation above geoid</dd><dt><span>mesh :</span></dt><dd>adcirc_mesh</dd><dt><span>standard_name :</span></dt><dd>sea_surface_height_above_geoid</dd><dt><span>units :</span></dt><dd>m</dd></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.DataArray 'sea_surface_height_above_geoid' (append_dim: 2, time: 252,\n",
" node: 8303)>\n",
"[4184712 values with dtype=float64]\n",
"Coordinates:\n",
" * append_dim (append_dim) int64 1 2\n",
" latitude (node) float64 ...\n",
" longitude (node) float64 ...\n",
" * time (time) datetime64[ns] 2020-04-28T01:20:00 ... 2020-05-12\n",
"Dimensions without coordinates: node\n",
"Attributes:\n",
" location: node\n",
" long_name: water surface elevation above geoid\n",
" mesh: adcirc_mesh\n",
" standard_name: sea_surface_height_above_geoid\n",
" units: m"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds_json[\"sea_surface_height_above_geoid\"]"
]
},
{
"cell_type": "markdown",
"id": "4f20b27a-0104-445c-8978-9f550433cdc3",
"metadata": {},
"source": [
"Now lets try using parquet"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "64621436-58a5-4af7-b81b-73b6ca3385d9",
"metadata": {},
"outputs": [
{
"ename": "ArrayNotFoundError",
"evalue": "array not found at path %r' 'air_pressure_at_sea_level'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/storage.py:1428\u001b[0m, in \u001b[0;36mFSStore.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1427\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1428\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1429\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/fsspec/mapping.py:151\u001b[0m, in \u001b[0;36mFSMap.__getitem__\u001b[0;34m(self, key, default)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 151\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmissing_exceptions:\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/fsspec/implementations/reference.py:801\u001b[0m, in \u001b[0;36mReferenceFileSystem.cat\u001b[0;34m(self, path, recursive, on_error, **kwargs)\u001b[0m\n\u001b[1;32m 800\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m proto, paths \u001b[38;5;129;01min\u001b[39;00m proto_dict\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 801\u001b[0m fs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfss\u001b[49m\u001b[43m[\u001b[49m\u001b[43mproto\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 802\u001b[0m urls, starts, ends, valid_paths \u001b[38;5;241m=\u001b[39m [], [], [], []\n",
"\u001b[0;31mKeyError\u001b[0m: 'file'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/core.py:252\u001b[0m, in \u001b[0;36mArray._load_metadata_nosync\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 251\u001b[0m mkey \u001b[38;5;241m=\u001b[39m _prefix_to_array_key(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_key_prefix)\n\u001b[0;32m--> 252\u001b[0m meta_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_store\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/storage.py:1430\u001b[0m, in \u001b[0;36mFSStore.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1429\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
"\u001b[0;31mKeyError\u001b[0m: 'air_pressure_at_sea_level/.zarray'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mArrayNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 12\u001b[0m\n\u001b[1;32m 3\u001b[0m fs \u001b[38;5;241m=\u001b[39m fsspec\u001b[38;5;241m.\u001b[39mfilesystem(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m output_mapper \u001b[38;5;241m=\u001b[39m LazyReferenceMapper\u001b[38;5;241m.\u001b[39mcreate(root\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkerchunked.parq\u001b[39m\u001b[38;5;124m\"\u001b[39m, fs\u001b[38;5;241m=\u001b[39mfs, record_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100_000\u001b[39m)\n\u001b[1;32m 6\u001b[0m mzz \u001b[38;5;241m=\u001b[39m \u001b[43mMultiZarrToZarr\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mzarr_refs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mremote_protocol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoo_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mappend_dim\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_dims\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mappend_dim\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43midentical_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtime\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnode\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnele\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlatitude\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlongitude\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m---> 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_mapper\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranslate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m output_mapper\u001b[38;5;241m.\u001b[39mflush()\n\u001b[1;32m 15\u001b[0m df \u001b[38;5;241m=\u001b[39m refs_to_dataframe(output_dict, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkerchunked.parq\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/combine.py:500\u001b[0m, in \u001b[0;36mMultiZarrToZarr.translate\u001b[0;34m(self, filename, storage_options)\u001b[0m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_coords()\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m3\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone:\n\u001b[0;32m--> 500\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msecond_pass\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m4\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone:\n\u001b[1;32m 502\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpostprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/combine.py:359\u001b[0m, in \u001b[0;36mMultiZarrToZarr.second_pass\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 355\u001b[0m z \u001b[38;5;241m=\u001b[39m zarr\u001b[38;5;241m.\u001b[39mopen(m)\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m no_deps \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;66;03m# done first time only\u001b[39;00m\n\u001b[0;32m--> 359\u001b[0m deps \u001b[38;5;241m=\u001b[39m [\u001b[43mz\u001b[49m\u001b[43m[\u001b[49m\u001b[43m_\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mattrs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_ARRAY_DIMENSIONS\u001b[39m\u001b[38;5;124m\"\u001b[39m, []) \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m z]\n\u001b[1;32m 360\u001b[0m all_deps \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(\u001b[38;5;28msum\u001b[39m(deps, []))\n\u001b[1;32m 361\u001b[0m no_deps \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoo_map) \u001b[38;5;241m-\u001b[39m all_deps\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/hierarchy.py:462\u001b[0m, in \u001b[0;36mGroup.__getitem__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 460\u001b[0m path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_item_path(item)\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m contains_array(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store, path):\n\u001b[0;32m--> 462\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mArray\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 463\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_store\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 464\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 465\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunk_store\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_chunk_store\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[43m \u001b[49m\u001b[43msynchronizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_synchronizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 469\u001b[0m \u001b[43m \u001b[49m\u001b[43mzarr_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 470\u001b[0m \u001b[43m \u001b[49m\u001b[43mmeta_array\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_meta_array\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 471\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 472\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m contains_group(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store, path, explicit_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m 473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Group(\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store,\n\u001b[1;32m 475\u001b[0m read_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_read_only,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 481\u001b[0m meta_array\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_meta_array,\n\u001b[1;32m 482\u001b[0m )\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/core.py:224\u001b[0m, in \u001b[0;36mArray.__init__\u001b[0;34m(self, store, path, read_only, chunk_store, synchronizer, cache_metadata, cache_attrs, partial_decompress, write_empty_chunks, zarr_version, meta_array)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata_key_suffix \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_hierarchy_metadata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata_key_suffix\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# initialize metadata\u001b[39;00m\n\u001b[0;32m--> 224\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_load_metadata\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# initialize attributes\u001b[39;00m\n\u001b[1;32m 227\u001b[0m akey \u001b[38;5;241m=\u001b[39m _prefix_to_attrs_key(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_key_prefix)\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/core.py:243\u001b[0m, in \u001b[0;36mArray._load_metadata\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"(Re)load metadata from store.\"\"\"\u001b[39;00m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_synchronizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 243\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_load_metadata_nosync\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 245\u001b[0m mkey \u001b[38;5;241m=\u001b[39m _prefix_to_array_key(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_key_prefix)\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/zarr/core.py:254\u001b[0m, in \u001b[0;36mArray._load_metadata_nosync\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 252\u001b[0m meta_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store[mkey]\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m--> 254\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ArrayNotFoundError(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_path)\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 256\u001b[0m \n\u001b[1;32m 257\u001b[0m \u001b[38;5;66;03m# decode and store metadata as instance members\u001b[39;00m\n\u001b[1;32m 258\u001b[0m meta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39m_metadata_class\u001b[38;5;241m.\u001b[39mdecode_array_metadata(meta_bytes)\n",
"\u001b[0;31mArrayNotFoundError\u001b[0m: array not found at path %r' 'air_pressure_at_sea_level'"
]
}
],
"source": [
"os.makedirs(\"kerchunked.parq\", exist_ok=True)\n",
"\n",
"fs = fsspec.filesystem(\"file\")\n",
"output_mapper = LazyReferenceMapper.create(root=\"kerchunked.parq\", fs=fs, record_size=100_000)\n",
"\n",
"mzz = MultiZarrToZarr(\n",
" zarr_refs,\n",
" remote_protocol=\"memory\",\n",
" coo_map={\"append_dim\": new_dims}, \n",
" concat_dims=[\"append_dim\"],\n",
" identical_dims=[\"time\", \"node\", \"nele\", \"latitude\", \"longitude\"],\n",
" out=output_mapper).translate()\n",
"output_mapper.flush()\n",
"\n",
"df = refs_to_dataframe(output_dict, \"kerchunked.parq\")"
]
},
{
"cell_type": "markdown",
"id": "f0bdd055-6c97-4ee5-a71e-46f1795ee809",
"metadata": {},
"source": [
"Try the same, but with file reference:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "b6cf8027-0c9f-4669-b3c1-ee295423976f",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "Expected string or C-contiguous bytes-like object",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[10], line 12\u001b[0m\n\u001b[1;32m 3\u001b[0m fs \u001b[38;5;241m=\u001b[39m fsspec\u001b[38;5;241m.\u001b[39mfilesystem(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m output_mapper \u001b[38;5;241m=\u001b[39m LazyReferenceMapper\u001b[38;5;241m.\u001b[39mcreate(root\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkerchunked.parq\u001b[39m\u001b[38;5;124m\"\u001b[39m, fs\u001b[38;5;241m=\u001b[39mfs, record_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100_000\u001b[39m)\n\u001b[1;32m 6\u001b[0m mzz \u001b[38;5;241m=\u001b[39m \u001b[43mMultiZarrToZarr\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mzarr_refs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mremote_protocol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoo_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mappend_dim\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_dims\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcat_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mappend_dim\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43midentical_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtime\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnode\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnele\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlatitude\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlongitude\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m---> 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_mapper\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranslate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m output_mapper\u001b[38;5;241m.\u001b[39mflush()\n\u001b[1;32m 15\u001b[0m df \u001b[38;5;241m=\u001b[39m refs_to_dataframe(output_dict, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkerchunked.parq\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/combine.py:500\u001b[0m, in \u001b[0;36mMultiZarrToZarr.translate\u001b[0;34m(self, filename, storage_options)\u001b[0m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_coords()\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m3\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone:\n\u001b[0;32m--> 500\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msecond_pass\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m4\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone:\n\u001b[1;32m 502\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpostprocess \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/combine.py:395\u001b[0m, in \u001b[0;36mMultiZarrToZarr.second_pass\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mls(v, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 394\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m k\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mv\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 395\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m fs\u001b[38;5;241m.\u001b[39mreferences[k]\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 397\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSecond pass: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, i, v)\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/fsspec/implementations/reference.py:360\u001b[0m, in \u001b[0;36mLazyReferenceMapper.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;66;03m# metadata or top-level\u001b[39;00m\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_items[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[0;32m--> 360\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mzmetadata[key] \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 361\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbytes\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 362\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mTypeError\u001b[0m: Expected string or C-contiguous bytes-like object"
]
}
],
"source": [
"os.makedirs(\"kerchunked.parq\", exist_ok=True)\n",
"\n",
"fs = fsspec.filesystem(\"file\")\n",
"output_mapper = LazyReferenceMapper.create(root=\"kerchunked.parq\", fs=fs, record_size=100_000)\n",
"\n",
"mzz = MultiZarrToZarr(\n",
" zarr_refs,\n",
" remote_protocol=\"file\",\n",
" coo_map={\"append_dim\": new_dims}, \n",
" concat_dims=[\"append_dim\"],\n",
" identical_dims=[\"time\", \"node\", \"nele\", \"latitude\", \"longitude\"],\n",
" out=output_mapper).translate()\n",
"output_mapper.flush()\n",
"\n",
"df = refs_to_dataframe(output_dict, \"kerchunked.parq\")"
]
},
{
"cell_type": "markdown",
"id": "a813b3d6-3d07-4257-9743-df9b435b52ee",
"metadata": {},
"source": [
"What if we just converted the merged json to parquet?"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7a90cd02-ebdd-4734-8a8d-287527ec1e8f",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "LazyReferenceMapper.create() got multiple values for argument 'root'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[11], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkerchunked.json\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m ff:\n\u001b[1;32m 2\u001b[0m merged_json_refs \u001b[38;5;241m=\u001b[39m ujson\u001b[38;5;241m.\u001b[39mload(ff)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mrefs_to_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerged_json_refs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkerchunked.parq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/df.py:154\u001b[0m, in \u001b[0;36mrefs_to_dataframe\u001b[0;34m(fo, url, target_protocol, target_options, storage_options, record_size, categorical_threshold)\u001b[0m\n\u001b[1;32m 151\u001b[0m refs \u001b[38;5;241m=\u001b[39m refs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrefs\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 153\u001b[0m fs, _ \u001b[38;5;241m=\u001b[39m fsspec\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39murl_to_fs(url, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(storage_options \u001b[38;5;129;01mor\u001b[39;00m {}))\n\u001b[0;32m--> 154\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mLazyReferenceMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43mrecord_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcategorical_threshold\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcategorical_threshold\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(refs):\n\u001b[1;32m 159\u001b[0m out[k] \u001b[38;5;241m=\u001b[39m refs[k]\n",
"\u001b[0;31mTypeError\u001b[0m: LazyReferenceMapper.create() got multiple values for argument 'root'"
]
}
],
"source": [
"with open(\"kerchunked.json\", 'rb') as ff:\n",
" merged_json_refs = ujson.load(ff)\n",
"\n",
"refs_to_dataframe(merged_json_refs, \"kerchunked.parq\")"
]
},
{
"cell_type": "markdown",
"id": "472dceec-b4b9-4cc2-9056-f6a44e934c11",
"metadata": {},
"source": [
"This appears to be a bug since `record_size` is being interpreted by the function as `root` as well as `url`."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b5bd61b0-6d40-4c02-8c9b-54ca981452a6",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "LazyReferenceMapper.create() got multiple values for argument 'root'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrefs_to_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_json_refs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkerchunked.parq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrecord_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100_000\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/df.py:154\u001b[0m, in \u001b[0;36mrefs_to_dataframe\u001b[0;34m(fo, url, target_protocol, target_options, storage_options, record_size, categorical_threshold)\u001b[0m\n\u001b[1;32m 151\u001b[0m refs \u001b[38;5;241m=\u001b[39m refs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrefs\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 153\u001b[0m fs, _ \u001b[38;5;241m=\u001b[39m fsspec\u001b[38;5;241m.\u001b[39mcore\u001b[38;5;241m.\u001b[39murl_to_fs(url, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(storage_options \u001b[38;5;129;01mor\u001b[39;00m {}))\n\u001b[0;32m--> 154\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mLazyReferenceMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43mrecord_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcategorical_threshold\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcategorical_threshold\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(refs):\n\u001b[1;32m 159\u001b[0m out[k] \u001b[38;5;241m=\u001b[39m refs[k]\n",
"\u001b[0;31mTypeError\u001b[0m: LazyReferenceMapper.create() got multiple values for argument 'root'"
]
}
],
"source": [
"refs_to_dataframe(fo=merged_json_refs, url=\"kerchunked.parq\", record_size=100_000)"
]
},
{
"cell_type": "markdown",
"id": "3ef3e716-758c-451f-83e7-2af806db9e58",
"metadata": {},
"source": [
"Same for explicit argument specification"
]
},
{
"cell_type": "markdown",
"id": "43018d5a-7d14-470d-940d-0bbb948be911",
"metadata": {},
"source": [
"Under the hood we go, making the following change to `refs_to_dataframe` in `df.py` and calling it `refs_to_dataframe_2`\n",
"\n",
"```python3\n",
" out = LazyReferenceMapper.create(\n",
" record_size, root=url, fs=fs, categorical_threshold=categorical_threshold\n",
" )\n",
"```\n",
"\n",
"becomes:\n",
"\n",
"```python3\n",
" out = LazyReferenceMapper.create(\n",
" record_size=record_size, root=url, fs=fs, categorical_threshold=categorical_threshold\n",
" )\n",
"```\n"
]
},
{
"cell_type": "markdown",
"id": "5b2f1ea4-a3f8-4c94-9f1c-140213db6fc5",
"metadata": {},
"source": [
"Now, lets try that again [warning: below may not be reproduceable for the outside viewer]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0389a248-56ec-4ff4-b4a0-42cc45705192",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "Expected string or C-contiguous bytes-like object",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkerchunk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdf\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m refs_to_dataframe_2\n\u001b[0;32m----> 2\u001b[0m \u001b[43mrefs_to_dataframe_2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerged_json_refs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mkerchunked.parq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/kerchunk/df.py:220\u001b[0m, in \u001b[0;36mrefs_to_dataframe_2\u001b[0;34m(fo, url, target_protocol, target_options, storage_options, record_size, categorical_threshold)\u001b[0m\n\u001b[1;32m 215\u001b[0m out \u001b[38;5;241m=\u001b[39m LazyReferenceMapper\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 216\u001b[0m record_size\u001b[38;5;241m=\u001b[39mrecord_size, root\u001b[38;5;241m=\u001b[39murl, fs\u001b[38;5;241m=\u001b[39mfs, categorical_threshold\u001b[38;5;241m=\u001b[39mcategorical_threshold\n\u001b[1;32m 217\u001b[0m )\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(refs):\n\u001b[0;32m--> 220\u001b[0m \u001b[43mout\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m refs[k]\n\u001b[1;32m 221\u001b[0m out\u001b[38;5;241m.\u001b[39mflush()\n",
"File \u001b[0;32m~/miniconda3/envs/model2zarr/lib/python3.12/site-packages/fsspec/implementations/reference.py:360\u001b[0m, in \u001b[0;36mLazyReferenceMapper.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;66;03m# metadata or top-level\u001b[39;00m\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_items[key] \u001b[38;5;241m=\u001b[39m value\n\u001b[0;32m--> 360\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mzmetadata[key] \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 361\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbytes\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 362\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mTypeError\u001b[0m: Expected string or C-contiguous bytes-like object"
]
}
],
"source": [
"from kerchunk.df import refs_to_dataframe_2\n",
"refs_to_dataframe_2(merged_json_refs, \"kerchunked.parq\")"
]
},
{
"cell_type": "markdown",
"id": "b659f59d-0b24-4fe6-b187-b8b9334dfd42",
"metadata": {},
"source": [
"Back to our original error, so consistent at least"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7c7b5e4-756d-4639-9063-536cd3912833",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment