Created
March 31, 2019 01:19
-
-
Save spencerkclark/a2b47c6acc5a320659973a561b951c4c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "aospy issue 320\n-------------------" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import xarray as xr\n\nfrom aospy.internal_names import TIME_STR, TIME_BOUNDS_STR, BOUNDS_STR, TIME_WEIGHTS_STR", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def ensure_time_as_index_failing_order(ds):\n for name in ['a', 'time_bounds', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n da = ds[name].expand_dims(TIME_STR)\n ds[TIME_STR] = ds[TIME_STR]\n ds[name] = da\n return ds", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def ensure_time_as_index_passing_order(ds):\n for name in ['time_bounds', 'a', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n da = ds[name].expand_dims(TIME_STR)\n ds[TIME_STR] = ds[TIME_STR]\n ds[name] = da\n return ds", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def construct_data():\n arr = xr.DataArray([-93], dims=[TIME_STR], coords={TIME_STR: [3]})\n arr[TIME_STR].attrs['units'] = 'days since 2000-01-01 00:00:00'\n arr[TIME_STR].attrs['calendar'] = 'standard'\n ds = arr.to_dataset(name='a')\n ds.coords[TIME_WEIGHTS_STR] = xr.DataArray(\n [1], dims=[TIME_STR], coords={TIME_STR: arr[TIME_STR]}\n )\n ds.coords[TIME_BOUNDS_STR] = xr.DataArray(\n [[3.5, 4.5]], dims=[TIME_STR, BOUNDS_STR],\n coords={TIME_STR: arr[TIME_STR]}\n )\n ds = ds.isel(**{TIME_STR: 0}).expand_dims(TIME_STR)\n return ds", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "arr = xr.DataArray([-93], dims=[TIME_STR], coords={TIME_STR: [3]})\narr.indexes", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "time: Int64Index([3], dtype='int64', name='time')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "arr.isel(time=0).expand_dims('time').indexes", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "time: Int64Index([3], dtype='int64', name='time')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ds = construct_data()", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ds", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights int64 1\n time_bounds (bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Despite have a dimension and a coordinate, 'a' does not have any indexes\n# initially in xarray 0.12 (this is different than in 0.11.3). This may be a\n# regression in xarray. I'll see if I can find a more minimal example.\nds['a'].indexes", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "actual = ensure_time_as_index_failing_order(ds)", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "ValueError", | |
"evalue": "Dimension time already exists.", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-10-0a0a8548d13e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mactual\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mensure_time_as_index_failing_order\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-2-688509dd8c9a>\u001b[0m in \u001b[0;36mensure_time_as_index_failing_order\u001b[0;34m(ds)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'time_bounds'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'time_weights'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mTIME_STR\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindexes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mda\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m//anaconda/envs/aospy_dev/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mexpand_dims\u001b[0;34m(self, dim, axis)\u001b[0m\n\u001b[1;32m 1163\u001b[0m \u001b[0mThis\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbut\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0man\u001b[0m \u001b[0madditional\u001b[0m \u001b[0mdimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1164\u001b[0m \"\"\"\n\u001b[0;32m-> 1165\u001b[0;31m \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_temp_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1166\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_temp_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m//anaconda/envs/aospy_dev/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36mexpand_dims\u001b[0;34m(self, dim, axis)\u001b[0m\n\u001b[1;32m 2370\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdims\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2371\u001b[0m raise ValueError(\n\u001b[0;32m-> 2372\u001b[0;31m 'Dimension {dim} already exists.'.format(dim=d))\n\u001b[0m\u001b[1;32m 2373\u001b[0m if (d in self._variables and\n\u001b[1;32m 2374\u001b[0m not utils.is_scalar(self._variables[d])):\n", | |
"\u001b[0;31mValueError\u001b[0m: Dimension time already exists." | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# If 'a' comes after one of the coordinates things work\nds = construct_data()\nactual = ensure_time_as_index_passing_order(ds)", | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "actual", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 12, | |
"data": { | |
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights (time) int64 1\n time_bounds (time, bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# We have mutated ds in the process, however. \nds", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 13, | |
"data": { | |
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights (time) int64 1\n time_bounds (time, bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "xarray regression?\n----------------------\n\nHere is a minimal working example." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "da = xr.DataArray([1], [('x', [0])], name='a')\nda.indexes", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 14, | |
"data": { | |
"text/plain": "x: Int64Index([0], dtype='int64', name='x')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# This behaves appropriately\nda.isel(x=0).expand_dims('x').indexes", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 15, | |
"data": { | |
"text/plain": "x: Int64Index([0], dtype='int64', name='x')" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# However this does not in xarray 0.12.0\nda.to_dataset().isel(x=0).expand_dims('x').a.indexes", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 16, | |
"data": { | |
"text/plain": "" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Adapted logic\n----------------" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def ensure_time_as_index_non_mutating(ds):\n time_indexed_coords = {TIME_WEIGHTS_STR, TIME_BOUNDS_STR}\n time_indexed_vars = set(ds.data_vars).union(time_indexed_coords)\n time_indexed_vars = time_indexed_vars.intersection(ds.variables)\n variables_to_replace = {}\n for name in ['a', 'time_bounds', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n # Note 'a' *should* have a time index; however due to the\n # xarray regression, it does not, so we end up operating on it here, and need\n # to adapt our logic accordingly.\n da = ds[name]\n \n # Don't expand_dims if a time dimension already exists (as in the case of 'a')\n if TIME_STR not in da.dims:\n da = ds[name].expand_dims(TIME_STR)\n da = da.assign_coords(**{TIME_STR: ds[TIME_STR]})\n variables_to_replace[name] = da\n return ds.assign(**variables_to_replace)", | |
"execution_count": 17, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ds = construct_data()", | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "actual = ensure_time_as_index_non_mutating(ds)", | |
"execution_count": 19, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# ds has not been changed\nds", | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 20, | |
"data": { | |
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights int64 1\n time_bounds (bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.7", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment