Skip to content

Instantly share code, notes, and snippets.

@aidanheerdegen
Created January 17, 2022 03:51
Show Gist options
  • Save aidanheerdegen/86435493875597b8d2bb5e2c5fc12556 to your computer and use it in GitHub Desktop.
Save aidanheerdegen/86435493875597b8d2bb5e2c5fc12556 to your computer and use it in GitHub Desktop.
Concatenate data files without correct time dimensions
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e6cd8fcb-b91d-4f22-a905-0f85ed919b99",
"metadata": {},
"outputs": [],
"source": [
"import xarray as xr\n",
"from pathlib import Path\n",
"from dateutil import parser\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4ac285d5-7165-42fa-a9ef-df2340fc0b15",
"metadata": {},
"outputs": [],
"source": [
"def open_ETH(file, origin = '1960-01-01', timechunk=50):\n",
" \"\"\"Open a netCDF file from ETH, alter the time metadata based on filename and return an xarray.Dataset\n",
" file : pathlib.Path object pointing to an ETH data file\n",
" origin: origin date for time coordinate\n",
" \"\"\"\n",
"\n",
" # Open the data file, don't bother decoding, and immediately drop the time dimension\n",
" ds = xr.open_dataset(file, engine='netcdf4', decode_cf=False).squeeze(dim='time', drop=True)\n",
" \n",
" # Parse the model time from the file name\n",
" modeltime = datetime.datetime.strptime( file.name, \"C%Y%m%d_%H\" )\n",
" \n",
" # Calculate the offset from the origin to the modeltime in days\n",
" days = (modeltime - parser.parse(origin))/datetime.timedelta(days=1)\n",
" \n",
" # Create a new time coordinate\n",
" time = xr.DataArray([days], dims = ['time'], coords = {'time': [days]}) \n",
"\n",
" # Add time coordinate back into data and set time units\n",
" ds = ds.expand_dims(time=time)\n",
" ds.time.attrs['units'] = f\"days since {origin}\"\n",
" \n",
" # Set the chunking along the time dimension (first index) to timechunk\n",
" # for all data variables\n",
" for v in ds.data_vars:\n",
" ds[v].encoding['chunksizes'] = (timechunk, *ds[v].encoding['chunksizes'][1:])\n",
" \n",
" # Return dataset\n",
" return ds"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c80cf1d6-8640-42f3-8ddd-2c744d5023e2",
"metadata": {},
"outputs": [],
"source": [
"def concat_ETH(dir):\n",
" \"\"\"\n",
" Open all ETH data files in a specified directory and concatenate along the time axis\n",
" \"\"\"\n",
" # Use a list comprehension to open all the files in an array, and use that as input\n",
" # to the xarray.concat function\n",
" ds = xr.concat([open_ETH(f) for f in sorted(Path(dir).glob('*'))], dim='time')\n",
" \n",
" # Save source directory as attribute to be used later. Note that a Path object will not\n",
" # append a slash to a directory name, regardless if that is how it was specified.\n",
" ds.attrs['source_directory'] = str(dir)\n",
" \n",
" return ds"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b0cf834f-7678-4c72-b24d-254e42a03277",
"metadata": {},
"outputs": [],
"source": [
"def save_ETH(ds, outpath=None):\n",
" \"\"\"\n",
" Save concatenated dataset to a netCDF file. This is just a wrapper around the to_netcdf\n",
" method, but does some manipulation of the encoding information so that it is saved with\n",
" the proper chunking and infers an output path if one is not provided\n",
" \"\"\"\n",
" # Create an encoding dictionary by grabbing the .encoding attributes from all the variables\n",
" enc = ds.encoding\n",
" for v in ds.variables:\n",
" enc[v] = ds[v].encoding\n",
" \n",
" # Make sure the new time coordinate is not chunked and remove any unlimited dimensions\n",
" enc['time']['contiguous'] = True\n",
" try:\n",
" del(enc['unlimited_dims'])\n",
" except:\n",
" pass\n",
" \n",
" if outpath is None:\n",
" # Set output path to be the name of the concatenated directory with `.nc` suffix\n",
" outpath = ds.attrs['source_directory'] + '.nc'\n",
" \n",
" ds.to_netcdf(outpath, encoding=enc)"
]
},
{
"cell_type": "markdown",
"id": "5a3b1f53-b21a-4b45-b15c-e9a3798cbede",
"metadata": {},
"source": [
"It is possible to open a single datafile"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "87b886a6-8144-4fd7-937c-c196ce49e60c",
"metadata": {},
"outputs": [],
"source": [
"ds = open_ETH(Path('/g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901/C20190130_01'))"
]
},
{
"cell_type": "markdown",
"id": "d73cd4fc-469b-48c0-9379-3f47c0c80f78",
"metadata": {},
"source": [
"Can check the contents, as the returned dataset is not decoded by default, call `xr.decode_cf` so the time coordinate is displayed in a human readable way with `datetime` stamps"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "93ef4924-559b-41f3-a495-67152321d7c5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block !important;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt,\n",
".xr-attrs dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2 {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
"Dimensions: (time: 1, lon: 160, lat: 121, dimz_INPUT: 1)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 2019-01-30T01:00:00\n",
" * lon (lon) float64 100.0 100.5 101.0 101.5 ... 178.0 178.5 179.0 179.5\n",
" * lat (lat) float64 -60.0 -59.5 -59.0 -58.5 -58.0 ... -1.5 -1.0 -0.5 0.0\n",
"Dimensions without coordinates: dimz_INPUT\n",
"Data variables:\n",
" INPUT (time, dimz_INPUT, lat, lon) float32 ...\n",
" LABEL (time, dimz_INPUT, lat, lon) float32 ...\n",
" AGE (time, dimz_INPUT, lat, lon) float32 ...\n",
" LIFETIME (time, dimz_INPUT, lat, lon) float32 ...\n",
" PMIN (time, dimz_INPUT, lat, lon) float32 ...\n",
"Attributes: (12/13)\n",
" CDI: Climate Data Interface version 1.9.6 (http://mpimet...\n",
" history: Mon Oct 11 22:47:52 2021: cdo -f nc4c -z zip sellon...\n",
" Conventions: CF-1.6\n",
" domxmin: -180.0\n",
" domxmax: 179.5\n",
" domymin: -90.0\n",
" ... ...\n",
" domzmin: 1050.0\n",
" domzmax: 1050.0\n",
" domamin: 0.0\n",
" domamax: 0.0\n",
" constants_file_name: C20190130_01_cst\n",
" CDO: Climate Data Operators version 1.9.6 (http://mpimet...</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-8f8a0a7c-6635-43ad-ac81-149d58e6adfb' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-8f8a0a7c-6635-43ad-ac81-149d58e6adfb' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span class='xr-has-index'>time</span>: 1</li><li><span class='xr-has-index'>lon</span>: 160</li><li><span class='xr-has-index'>lat</span>: 121</li><li><span>dimz_INPUT</span>: 1</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-ea9c5a45-107a-4d83-b79d-a324c6c12f68' class='xr-section-summary-in' type='checkbox' checked><label for='section-ea9c5a45-107a-4d83-b79d-a324c6c12f68' class='xr-section-summary' >Coordinates: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>time</span></div><div class='xr-var-dims'>(time)</div><div class='xr-var-dtype'>datetime64[ns]</div><div class='xr-var-preview xr-preview'>2019-01-30T01:00:00</div><input id='attrs-ab2e4cd1-535b-4be3-bd01-e2eec7cad04b' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-ab2e4cd1-535b-4be3-bd01-e2eec7cad04b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-4ae7fdd6-8faf-4996-9dc8-738f04ea9c16' class='xr-var-data-in' type='checkbox'><label for='data-4ae7fdd6-8faf-4996-9dc8-738f04ea9c16' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([&#x27;2019-01-30T01:00:00.000000000&#x27;], dtype=&#x27;datetime64[ns]&#x27;)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lon</span></div><div class='xr-var-dims'>(lon)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>100.0 100.5 101.0 ... 179.0 179.5</div><input id='attrs-80299452-55de-4137-b112-7ceddd042f3e' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-80299452-55de-4137-b112-7ceddd042f3e' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-29f23b2f-8409-4e2d-baf6-91291ae9fef8' class='xr-var-data-in' type='checkbox'><label for='data-29f23b2f-8409-4e2d-baf6-91291ae9fef8' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>standard_name :</span></dt><dd>longitude</dd><dt><span>long_name :</span></dt><dd>longitude</dd><dt><span>units :</span></dt><dd>degrees_east</dd><dt><span>axis :</span></dt><dd>X</dd></dl></div><div class='xr-var-data'><pre>array([100. , 100.5, 101. , 101.5, 102. , 102.5, 103. , 103.5, 104. , 104.5,\n",
" 105. , 105.5, 106. , 106.5, 107. , 107.5, 108. , 108.5, 109. , 109.5,\n",
" 110. , 110.5, 111. , 111.5, 112. , 112.5, 113. , 113.5, 114. , 114.5,\n",
" 115. , 115.5, 116. , 116.5, 117. , 117.5, 118. , 118.5, 119. , 119.5,\n",
" 120. , 120.5, 121. , 121.5, 122. , 122.5, 123. , 123.5, 124. , 124.5,\n",
" 125. , 125.5, 126. , 126.5, 127. , 127.5, 128. , 128.5, 129. , 129.5,\n",
" 130. , 130.5, 131. , 131.5, 132. , 132.5, 133. , 133.5, 134. , 134.5,\n",
" 135. , 135.5, 136. , 136.5, 137. , 137.5, 138. , 138.5, 139. , 139.5,\n",
" 140. , 140.5, 141. , 141.5, 142. , 142.5, 143. , 143.5, 144. , 144.5,\n",
" 145. , 145.5, 146. , 146.5, 147. , 147.5, 148. , 148.5, 149. , 149.5,\n",
" 150. , 150.5, 151. , 151.5, 152. , 152.5, 153. , 153.5, 154. , 154.5,\n",
" 155. , 155.5, 156. , 156.5, 157. , 157.5, 158. , 158.5, 159. , 159.5,\n",
" 160. , 160.5, 161. , 161.5, 162. , 162.5, 163. , 163.5, 164. , 164.5,\n",
" 165. , 165.5, 166. , 166.5, 167. , 167.5, 168. , 168.5, 169. , 169.5,\n",
" 170. , 170.5, 171. , 171.5, 172. , 172.5, 173. , 173.5, 174. , 174.5,\n",
" 175. , 175.5, 176. , 176.5, 177. , 177.5, 178. , 178.5, 179. , 179.5])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lat</span></div><div class='xr-var-dims'>(lat)</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>-60.0 -59.5 -59.0 ... -1.0 -0.5 0.0</div><input id='attrs-629a162c-a72e-408f-a717-85376692f0eb' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-629a162c-a72e-408f-a717-85376692f0eb' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-9abd1e3a-318c-40b2-8cf9-e04ba0abd3e6' class='xr-var-data-in' type='checkbox'><label for='data-9abd1e3a-318c-40b2-8cf9-e04ba0abd3e6' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>standard_name :</span></dt><dd>latitude</dd><dt><span>long_name :</span></dt><dd>latitude</dd><dt><span>units :</span></dt><dd>degrees_north</dd><dt><span>axis :</span></dt><dd>Y</dd></dl></div><div class='xr-var-data'><pre>array([-60. , -59.5, -59. , -58.5, -58. , -57.5, -57. , -56.5, -56. , -55.5,\n",
" -55. , -54.5, -54. , -53.5, -53. , -52.5, -52. , -51.5, -51. , -50.5,\n",
" -50. , -49.5, -49. , -48.5, -48. , -47.5, -47. , -46.5, -46. , -45.5,\n",
" -45. , -44.5, -44. , -43.5, -43. , -42.5, -42. , -41.5, -41. , -40.5,\n",
" -40. , -39.5, -39. , -38.5, -38. , -37.5, -37. , -36.5, -36. , -35.5,\n",
" -35. , -34.5, -34. , -33.5, -33. , -32.5, -32. , -31.5, -31. , -30.5,\n",
" -30. , -29.5, -29. , -28.5, -28. , -27.5, -27. , -26.5, -26. , -25.5,\n",
" -25. , -24.5, -24. , -23.5, -23. , -22.5, -22. , -21.5, -21. , -20.5,\n",
" -20. , -19.5, -19. , -18.5, -18. , -17.5, -17. , -16.5, -16. , -15.5,\n",
" -15. , -14.5, -14. , -13.5, -13. , -12.5, -12. , -11.5, -11. , -10.5,\n",
" -10. , -9.5, -9. , -8.5, -8. , -7.5, -7. , -6.5, -6. , -5.5,\n",
" -5. , -4.5, -4. , -3.5, -3. , -2.5, -2. , -1.5, -1. , -0.5,\n",
" 0. ])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-24717b6b-c62b-49c1-a43b-0c7b854df123' class='xr-section-summary-in' type='checkbox' checked><label for='section-24717b6b-c62b-49c1-a43b-0c7b854df123' class='xr-section-summary' >Data variables: <span>(5)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>INPUT</span></div><div class='xr-var-dims'>(time, dimz_INPUT, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-04f09f38-128b-4945-b500-274d26ca899b' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-04f09f38-128b-4945-b500-274d26ca899b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-18bd2234-09b9-4773-8730-eb4abf309da6' class='xr-var-data-in' type='checkbox'><label for='data-18bd2234-09b9-4773-8730-eb4abf309da6' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>xmin :</span></dt><dd>-180.0</dd><dt><span>xmax :</span></dt><dd>179.5</dd><dt><span>xstag :</span></dt><dd>0.0</dd><dt><span>ymin :</span></dt><dd>-90.0</dd><dt><span>ymax :</span></dt><dd>90.0</dd><dt><span>ystag :</span></dt><dd>0.0</dd><dt><span>zmin :</span></dt><dd>1050.0</dd><dt><span>zmax :</span></dt><dd>1050.0</dd><dt><span>zstag :</span></dt><dd>0.0</dd><dt><span>missing_data :</span></dt><dd>-999.99</dd></dl></div><div class='xr-var-data'><pre>array([[[[0., ..., 0.],\n",
" ...,\n",
" [0., ..., 0.]]]], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>LABEL</span></div><div class='xr-var-dims'>(time, dimz_INPUT, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-a0dd0f60-0526-414d-aaed-5c8380923073' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-a0dd0f60-0526-414d-aaed-5c8380923073' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-30070db2-866b-471c-840e-f72e5eb84702' class='xr-var-data-in' type='checkbox'><label for='data-30070db2-866b-471c-840e-f72e5eb84702' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>xmin :</span></dt><dd>-180.0</dd><dt><span>xmax :</span></dt><dd>179.5</dd><dt><span>xstag :</span></dt><dd>0.0</dd><dt><span>ymin :</span></dt><dd>-90.0</dd><dt><span>ymax :</span></dt><dd>90.0</dd><dt><span>ystag :</span></dt><dd>0.0</dd><dt><span>zmin :</span></dt><dd>1050.0</dd><dt><span>zmax :</span></dt><dd>1050.0</dd><dt><span>zstag :</span></dt><dd>0.0</dd><dt><span>missing_data :</span></dt><dd>-999.99</dd></dl></div><div class='xr-var-data'><pre>array([[[[0., ..., 0.],\n",
" ...,\n",
" [0., ..., 0.]]]], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>AGE</span></div><div class='xr-var-dims'>(time, dimz_INPUT, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-7647834f-0f8e-4e42-88d9-3eabbe18437f' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-7647834f-0f8e-4e42-88d9-3eabbe18437f' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-630b31c7-0458-4d65-bdb5-d091a5ddead1' class='xr-var-data-in' type='checkbox'><label for='data-630b31c7-0458-4d65-bdb5-d091a5ddead1' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>xmin :</span></dt><dd>-180.0</dd><dt><span>xmax :</span></dt><dd>179.5</dd><dt><span>xstag :</span></dt><dd>0.0</dd><dt><span>ymin :</span></dt><dd>-90.0</dd><dt><span>ymax :</span></dt><dd>90.0</dd><dt><span>ystag :</span></dt><dd>0.0</dd><dt><span>zmin :</span></dt><dd>1050.0</dd><dt><span>zmax :</span></dt><dd>1050.0</dd><dt><span>zstag :</span></dt><dd>0.0</dd><dt><span>missing_data :</span></dt><dd>-999.99</dd></dl></div><div class='xr-var-data'><pre>array([[[[0., ..., 0.],\n",
" ...,\n",
" [0., ..., 0.]]]], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>LIFETIME</span></div><div class='xr-var-dims'>(time, dimz_INPUT, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-f981e7e8-4717-42fa-96b1-a448abbda1f7' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-f981e7e8-4717-42fa-96b1-a448abbda1f7' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-0749b351-b95e-42b6-a76e-0ba91e23c0a1' class='xr-var-data-in' type='checkbox'><label for='data-0749b351-b95e-42b6-a76e-0ba91e23c0a1' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>xmin :</span></dt><dd>-180.0</dd><dt><span>xmax :</span></dt><dd>179.5</dd><dt><span>xstag :</span></dt><dd>0.0</dd><dt><span>ymin :</span></dt><dd>-90.0</dd><dt><span>ymax :</span></dt><dd>90.0</dd><dt><span>ystag :</span></dt><dd>0.0</dd><dt><span>zmin :</span></dt><dd>1050.0</dd><dt><span>zmax :</span></dt><dd>1050.0</dd><dt><span>zstag :</span></dt><dd>0.0</dd><dt><span>missing_data :</span></dt><dd>-999.99</dd></dl></div><div class='xr-var-data'><pre>array([[[[0., ..., 0.],\n",
" ...,\n",
" [0., ..., 0.]]]], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>PMIN</span></div><div class='xr-var-dims'>(time, dimz_INPUT, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-822cb01b-1a45-417a-bd72-1169284d9d63' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-822cb01b-1a45-417a-bd72-1169284d9d63' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-7b15af69-47fb-4774-83c0-e00b77c26471' class='xr-var-data-in' type='checkbox'><label for='data-7b15af69-47fb-4774-83c0-e00b77c26471' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>xmin :</span></dt><dd>-180.0</dd><dt><span>xmax :</span></dt><dd>179.5</dd><dt><span>xstag :</span></dt><dd>0.0</dd><dt><span>ymin :</span></dt><dd>-90.0</dd><dt><span>ymax :</span></dt><dd>90.0</dd><dt><span>ystag :</span></dt><dd>0.0</dd><dt><span>zmin :</span></dt><dd>1050.0</dd><dt><span>zmax :</span></dt><dd>1050.0</dd><dt><span>zstag :</span></dt><dd>0.0</dd><dt><span>missing_data :</span></dt><dd>-999.99</dd></dl></div><div class='xr-var-data'><pre>array([[[[0., ..., 0.],\n",
" ...,\n",
" [0., ..., 0.]]]], dtype=float32)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-26ac0a70-ece1-4cf8-99ad-d31c5d3be2c4' class='xr-section-summary-in' type='checkbox' ><label for='section-26ac0a70-ece1-4cf8-99ad-d31c5d3be2c4' class='xr-section-summary' >Attributes: <span>(13)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'><dt><span>CDI :</span></dt><dd>Climate Data Interface version 1.9.6 (http://mpimet.mpg.de/cdi)</dd><dt><span>history :</span></dt><dd>Mon Oct 11 22:47:52 2021: cdo -f nc4c -z zip sellonlatbox,100,179.5,-60,0 TMP0 201901/C20190130_01\n",
"Thu Apr 22 19:54:48 2021: cdo setgrid,gridfile TMP1 C20190130_01</dd><dt><span>Conventions :</span></dt><dd>CF-1.6</dd><dt><span>domxmin :</span></dt><dd>-180.0</dd><dt><span>domxmax :</span></dt><dd>179.5</dd><dt><span>domymin :</span></dt><dd>-90.0</dd><dt><span>domymax :</span></dt><dd>90.0</dd><dt><span>domzmin :</span></dt><dd>1050.0</dd><dt><span>domzmax :</span></dt><dd>1050.0</dd><dt><span>domamin :</span></dt><dd>0.0</dd><dt><span>domamax :</span></dt><dd>0.0</dd><dt><span>constants_file_name :</span></dt><dd>C20190130_01_cst</dd><dt><span>CDO :</span></dt><dd>Climate Data Operators version 1.9.6 (http://mpimet.mpg.de/cdo)</dd></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (time: 1, lon: 160, lat: 121, dimz_INPUT: 1)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 2019-01-30T01:00:00\n",
" * lon (lon) float64 100.0 100.5 101.0 101.5 ... 178.0 178.5 179.0 179.5\n",
" * lat (lat) float64 -60.0 -59.5 -59.0 -58.5 -58.0 ... -1.5 -1.0 -0.5 0.0\n",
"Dimensions without coordinates: dimz_INPUT\n",
"Data variables:\n",
" INPUT (time, dimz_INPUT, lat, lon) float32 ...\n",
" LABEL (time, dimz_INPUT, lat, lon) float32 ...\n",
" AGE (time, dimz_INPUT, lat, lon) float32 ...\n",
" LIFETIME (time, dimz_INPUT, lat, lon) float32 ...\n",
" PMIN (time, dimz_INPUT, lat, lon) float32 ...\n",
"Attributes: (12/13)\n",
" CDI: Climate Data Interface version 1.9.6 (http://mpimet...\n",
" history: Mon Oct 11 22:47:52 2021: cdo -f nc4c -z zip sellon...\n",
" Conventions: CF-1.6\n",
" domxmin: -180.0\n",
" domxmax: 179.5\n",
" domymin: -90.0\n",
" ... ...\n",
" domzmin: 1050.0\n",
" domzmax: 1050.0\n",
" domamin: 0.0\n",
" domamax: 0.0\n",
" constants_file_name: C20190130_01_cst\n",
" CDO: Climate Data Operators version 1.9.6 (http://mpimet..."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xr.decode_cf(ds)"
]
},
{
"cell_type": "markdown",
"id": "6db503e2-785c-4baf-862c-d825c467b536",
"metadata": {},
"source": [
"Test concatenating a directory of files"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f81e195d-5f09-4a61-8fef-60b325f57d81",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 16.1 s, sys: 1.22 s, total: 17.3 s\n",
"Wall time: 22.4 s\n"
]
}
],
"source": [
"%%time\n",
"ds = concat_ETH('/g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901')"
]
},
{
"cell_type": "markdown",
"id": "973cc46f-e4a9-4957-acb0-c1365d759d54",
"metadata": {},
"source": [
"Save to a file"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "14382753-ffba-409e-ac11-2a2088fca7b8",
"metadata": {},
"outputs": [],
"source": [
"save_ETH(ds)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "39e98929-e1b2-4d7c-aed7-9a7282bfb9b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r--. 1 aph502 v45 3.7M Jan 17 14:26 /g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901.nc\n"
]
}
],
"source": [
"!ls -lh '/g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901.nc'"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "dfef5cf2-31c0-401b-8186-c56743c1fd2d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"netcdf \\201901 {\n",
"dimensions:\n",
"\ttime = 744 ;\n",
"\tlon = 160 ;\n",
"\tlat = 121 ;\n",
"\tdimz_INPUT = 1 ;\n",
"variables:\n",
"\tdouble time(time) ;\n",
"\t\ttime:_FillValue = NaN ;\n",
"\t\ttime:units = \"days since 1960-01-01\" ;\n",
"\t\ttime:_Storage = \"contiguous\" ;\n",
"\t\ttime:_Endianness = \"little\" ;\n",
"\tdouble lon(lon) ;\n",
"\t\tlon:_FillValue = NaN ;\n",
"\t\tlon:standard_name = \"longitude\" ;\n",
"\t\tlon:long_name = \"longitude\" ;\n",
"\t\tlon:units = \"degrees_east\" ;\n",
"\t\tlon:axis = \"X\" ;\n",
"\t\tlon:_Storage = \"contiguous\" ;\n",
"\t\tlon:_Endianness = \"little\" ;\n",
"\tdouble lat(lat) ;\n",
"\t\tlat:_FillValue = NaN ;\n",
"\t\tlat:standard_name = \"latitude\" ;\n",
"\t\tlat:long_name = \"latitude\" ;\n",
"\t\tlat:units = \"degrees_north\" ;\n",
"\t\tlat:axis = \"Y\" ;\n",
"\t\tlat:_Storage = \"contiguous\" ;\n",
"\t\tlat:_Endianness = \"little\" ;\n",
"\tfloat INPUT(time, dimz_INPUT, lat, lon) ;\n",
"\t\tINPUT:_FillValue = NaNf ;\n",
"\t\tINPUT:xmin = -180.f ;\n",
"\t\tINPUT:xmax = 179.5f ;\n",
"\t\tINPUT:xstag = 0.f ;\n",
"\t\tINPUT:ymin = -90.f ;\n",
"\t\tINPUT:ymax = 90.f ;\n",
"\t\tINPUT:ystag = 0.f ;\n",
"\t\tINPUT:zmin = 1050.f ;\n",
"\t\tINPUT:zmax = 1050.f ;\n",
"\t\tINPUT:zstag = 0.f ;\n",
"\t\tINPUT:missing_data = -999.99f ;\n",
"\t\tINPUT:_Storage = \"chunked\" ;\n",
"\t\tINPUT:_ChunkSizes = 50, 1, 121, 160 ;\n",
"\t\tINPUT:_DeflateLevel = 1 ;\n",
"\t\tINPUT:_Shuffle = \"true\" ;\n",
"\t\tINPUT:_Endianness = \"little\" ;\n",
"\tfloat LABEL(time, dimz_INPUT, lat, lon) ;\n",
"\t\tLABEL:_FillValue = NaNf ;\n",
"\t\tLABEL:xmin = -180.f ;\n",
"\t\tLABEL:xmax = 179.5f ;\n",
"\t\tLABEL:xstag = 0.f ;\n",
"\t\tLABEL:ymin = -90.f ;\n",
"\t\tLABEL:ymax = 90.f ;\n",
"\t\tLABEL:ystag = 0.f ;\n",
"\t\tLABEL:zmin = 1050.f ;\n",
"\t\tLABEL:zmax = 1050.f ;\n",
"\t\tLABEL:zstag = 0.f ;\n",
"\t\tLABEL:missing_data = -999.99f ;\n",
"\t\tLABEL:_Storage = \"chunked\" ;\n",
"\t\tLABEL:_ChunkSizes = 50, 1, 121, 160 ;\n",
"\t\tLABEL:_DeflateLevel = 1 ;\n",
"\t\tLABEL:_Shuffle = \"true\" ;\n",
"\t\tLABEL:_Endianness = \"little\" ;\n",
"\tfloat AGE(time, dimz_INPUT, lat, lon) ;\n",
"\t\tAGE:_FillValue = NaNf ;\n",
"\t\tAGE:xmin = -180.f ;\n",
"\t\tAGE:xmax = 179.5f ;\n",
"\t\tAGE:xstag = 0.f ;\n",
"\t\tAGE:ymin = -90.f ;\n",
"\t\tAGE:ymax = 90.f ;\n",
"\t\tAGE:ystag = 0.f ;\n",
"\t\tAGE:zmin = 1050.f ;\n",
"\t\tAGE:zmax = 1050.f ;\n",
"\t\tAGE:zstag = 0.f ;\n",
"\t\tAGE:missing_data = -999.99f ;\n",
"\t\tAGE:_Storage = \"chunked\" ;\n",
"\t\tAGE:_ChunkSizes = 50, 1, 121, 160 ;\n",
"\t\tAGE:_DeflateLevel = 1 ;\n",
"\t\tAGE:_Shuffle = \"true\" ;\n",
"\t\tAGE:_Endianness = \"little\" ;\n",
"\tfloat LIFETIME(time, dimz_INPUT, lat, lon) ;\n",
"\t\tLIFETIME:_FillValue = NaNf ;\n",
"\t\tLIFETIME:xmin = -180.f ;\n",
"\t\tLIFETIME:xmax = 179.5f ;\n",
"\t\tLIFETIME:xstag = 0.f ;\n",
"\t\tLIFETIME:ymin = -90.f ;\n",
"\t\tLIFETIME:ymax = 90.f ;\n",
"\t\tLIFETIME:ystag = 0.f ;\n",
"\t\tLIFETIME:zmin = 1050.f ;\n",
"\t\tLIFETIME:zmax = 1050.f ;\n",
"\t\tLIFETIME:zstag = 0.f ;\n",
"\t\tLIFETIME:missing_data = -999.99f ;\n",
"\t\tLIFETIME:_Storage = \"chunked\" ;\n",
"\t\tLIFETIME:_ChunkSizes = 50, 1, 121, 160 ;\n",
"\t\tLIFETIME:_DeflateLevel = 1 ;\n",
"\t\tLIFETIME:_Shuffle = \"true\" ;\n",
"\t\tLIFETIME:_Endianness = \"little\" ;\n",
"\tfloat PMIN(time, dimz_INPUT, lat, lon) ;\n",
"\t\tPMIN:_FillValue = NaNf ;\n",
"\t\tPMIN:xmin = -180.f ;\n",
"\t\tPMIN:xmax = 179.5f ;\n",
"\t\tPMIN:xstag = 0.f ;\n",
"\t\tPMIN:ymin = -90.f ;\n",
"\t\tPMIN:ymax = 90.f ;\n",
"\t\tPMIN:ystag = 0.f ;\n",
"\t\tPMIN:zmin = 1050.f ;\n",
"\t\tPMIN:zmax = 1050.f ;\n",
"\t\tPMIN:zstag = 0.f ;\n",
"\t\tPMIN:missing_data = -999.99f ;\n",
"\t\tPMIN:_Storage = \"chunked\" ;\n",
"\t\tPMIN:_ChunkSizes = 50, 1, 121, 160 ;\n",
"\t\tPMIN:_DeflateLevel = 1 ;\n",
"\t\tPMIN:_Shuffle = \"true\" ;\n",
"\t\tPMIN:_Endianness = \"little\" ;\n",
"\n",
"// global attributes:\n",
"\t\t:CDI = \"Climate Data Interface version 1.9.6 (http://mpimet.mpg.de/cdi)\" ;\n",
"\t\t:history = \"Mon Oct 11 22:47:08 2021: cdo -f nc4c -z zip sellonlatbox,100,179.5,-60,0 TMP0 201901/C20190101_00\\nThu Apr 22 19:52:34 2021: cdo setgrid,gridfile TMP1 C20190101_00\" ;\n",
"\t\t:Conventions = \"CF-1.6\" ;\n",
"\t\t:domxmin = -180.f ;\n",
"\t\t:domxmax = 179.5f ;\n",
"\t\t:domymin = -90.f ;\n",
"\t\t:domymax = 90.f ;\n",
"\t\t:domzmin = 1050.f ;\n",
"\t\t:domzmax = 1050.f ;\n",
"\t\t:domamin = 0.f ;\n",
"\t\t:domamax = 0.f ;\n",
"\t\t:constants_file_name = \"C20190101_00_cst\" ;\n",
"\t\t:CDO = \"Climate Data Operators version 1.9.6 (http://mpimet.mpg.de/cdo)\" ;\n",
"\t\t:source_directory = \"/g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901\" ;\n",
"\t\t:_NCProperties = \"version=2,netcdf=4.7.4,hdf5=1.10.6\" ;\n",
"\t\t:_SuperblockVersion = 0 ;\n",
"\t\t:_IsNetcdf4 = 1 ;\n",
"\t\t:_Format = \"netCDF-4\" ;\n",
"}\n"
]
}
],
"source": [
"!ncdump -hs '/g/data/v45/aph502/helpdesk/02272-Chenhui/ETH/clim.mincl/cyc/201901.nc'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9c511f8-d48b-4ea6-86e6-ca4085a11b9e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:analysis3-22.01]",
"language": "python",
"name": "conda-env-analysis3-22.01-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
#!/usr/bin/env python
# coding: utf-8
import xarray as xr
from pathlib import Path
from dateutil import parser
import datetime
import sys
def open_ETH(file, origin = '1960-01-01', timechunk=50):
"""Open a netCDF file from ETH, alter the time metadata based on filename and return an xarray.Dataset
file : pathlib.Path object pointing to an ETH data file
origin: origin date for time coordinate
"""
# Open the data file, don't bother decoding, and immediately drop the time dimension
ds = xr.open_dataset(file, engine='netcdf4', decode_cf=False).squeeze(dim='time', drop=True)
# Parse the model time from the file name
modeltime = datetime.datetime.strptime( file.name, "C%Y%m%d_%H" )
# Calculate the offset from the origin to the modeltime in days
days = (modeltime - parser.parse(origin))/datetime.timedelta(days=1)
# Create a new time coordinate
time = xr.DataArray([days], dims = ['time'], coords = {'time': [days]})
# Add time coordinate back into data and set time units
ds = ds.expand_dims(time=time)
ds.time.attrs['units'] = f"days since {origin}"
# Set the chunking along the time dimension (first index) to timechunk
# for all data variables
for v in ds.data_vars:
ds[v].encoding['chunksizes'] = (timechunk, *ds[v].encoding['chunksizes'][1:])
# Return dataset
return ds
def concat_ETH(dir):
"""
Open all ETH data files in a specified directory and concatenate along the time axis
"""
# Use a list comprehension to open all the files in an array, and use that as input
# to the xarray.concat function
ds = xr.concat([open_ETH(f) for f in sorted(Path(dir).glob('*'))], dim='time')
# Save source directory as attribute to be used later. Note that a Path object will not
# append a slash to a directory name, regardless if that is how it was specified.
ds.attrs['source_directory'] = str(dir)
return ds
def save_ETH(ds, outpath=None):
"""
Save concatenated dataset to a netCDF file. This is just a wrapper around the to_netcdf
method, but does some manipulation of the encoding information so that it is saved with
the proper chunking and infers an output path if one is not provided
"""
# Create an encoding dictionary by grabbing the .encoding attributes from all the variables
enc = ds.encoding
for v in ds.variables:
enc[v] = ds[v].encoding
# Make sure the new time coordinate is not chunked and remove any unlimited dimensions
enc['time']['contiguous'] = True
try:
del(enc['unlimited_dims'])
except:
pass
if outpath is None:
# Set output path to be the name of the concatenated directory with `.nc` suffix
outpath = ds.attrs['source_directory'] + '.nc'
ds.to_netcdf(outpath, encoding=enc)
return outpath
def main(directories):
"""
Main routine that loops through the directories passed as command line arguments
"""
for d in directories:
ds = concat_ETH(d)
saved_file = save_ETH(ds)
print(f"Concarenated {d} to {saved_file}")
if __name__ == '__main__':
# Pass all command line arguments after command name to main
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment