Skip to content

Instantly share code, notes, and snippets.

@spencerkclark
Created March 31, 2019 01:19
Show Gist options
  • Save spencerkclark/a2b47c6acc5a320659973a561b951c4c to your computer and use it in GitHub Desktop.
Save spencerkclark/a2b47c6acc5a320659973a561b951c4c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "aospy issue 320\n-------------------"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import xarray as xr\n\nfrom aospy.internal_names import TIME_STR, TIME_BOUNDS_STR, BOUNDS_STR, TIME_WEIGHTS_STR",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def ensure_time_as_index_failing_order(ds):\n for name in ['a', 'time_bounds', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n da = ds[name].expand_dims(TIME_STR)\n ds[TIME_STR] = ds[TIME_STR]\n ds[name] = da\n return ds",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def ensure_time_as_index_passing_order(ds):\n for name in ['time_bounds', 'a', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n da = ds[name].expand_dims(TIME_STR)\n ds[TIME_STR] = ds[TIME_STR]\n ds[name] = da\n return ds",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def construct_data():\n arr = xr.DataArray([-93], dims=[TIME_STR], coords={TIME_STR: [3]})\n arr[TIME_STR].attrs['units'] = 'days since 2000-01-01 00:00:00'\n arr[TIME_STR].attrs['calendar'] = 'standard'\n ds = arr.to_dataset(name='a')\n ds.coords[TIME_WEIGHTS_STR] = xr.DataArray(\n [1], dims=[TIME_STR], coords={TIME_STR: arr[TIME_STR]}\n )\n ds.coords[TIME_BOUNDS_STR] = xr.DataArray(\n [[3.5, 4.5]], dims=[TIME_STR, BOUNDS_STR],\n coords={TIME_STR: arr[TIME_STR]}\n )\n ds = ds.isel(**{TIME_STR: 0}).expand_dims(TIME_STR)\n return ds",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "arr = xr.DataArray([-93], dims=[TIME_STR], coords={TIME_STR: [3]})\narr.indexes",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "time: Int64Index([3], dtype='int64', name='time')"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "arr.isel(time=0).expand_dims('time').indexes",
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 6,
"data": {
"text/plain": "time: Int64Index([3], dtype='int64', name='time')"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ds = construct_data()",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ds",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights int64 1\n time_bounds (bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Despite have a dimension and a coordinate, 'a' does not have any indexes\n# initially in xarray 0.12 (this is different than in 0.11.3). This may be a\n# regression in xarray. I'll see if I can find a more minimal example.\nds['a'].indexes",
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": ""
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "actual = ensure_time_as_index_failing_order(ds)",
"execution_count": 10,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Dimension time already exists.",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-0a0a8548d13e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mactual\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mensure_time_as_index_failing_order\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-2-688509dd8c9a>\u001b[0m in \u001b[0;36mensure_time_as_index_failing_order\u001b[0;34m(ds)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'time_bounds'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'time_weights'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mTIME_STR\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindexes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mda\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTIME_STR\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/envs/aospy_dev/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mexpand_dims\u001b[0;34m(self, dim, axis)\u001b[0m\n\u001b[1;32m 1163\u001b[0m \u001b[0mThis\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbut\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0man\u001b[0m \u001b[0madditional\u001b[0m \u001b[0mdimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1164\u001b[0m \"\"\"\n\u001b[0;32m-> 1165\u001b[0;31m \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_temp_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1166\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_temp_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/envs/aospy_dev/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36mexpand_dims\u001b[0;34m(self, dim, axis)\u001b[0m\n\u001b[1;32m 2370\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdims\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2371\u001b[0m raise ValueError(\n\u001b[0;32m-> 2372\u001b[0;31m 'Dimension {dim} already exists.'.format(dim=d))\n\u001b[0m\u001b[1;32m 2373\u001b[0m if (d in self._variables and\n\u001b[1;32m 2374\u001b[0m not utils.is_scalar(self._variables[d])):\n",
"\u001b[0;31mValueError\u001b[0m: Dimension time already exists."
]
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# If 'a' comes after one of the coordinates things work\nds = construct_data()\nactual = ensure_time_as_index_passing_order(ds)",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "actual",
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 12,
"data": {
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights (time) int64 1\n time_bounds (time, bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# We have mutated ds in the process, however. \nds",
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 13,
"data": {
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights (time) int64 1\n time_bounds (time, bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93"
},
"metadata": {}
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "xarray regression?\n----------------------\n\nHere is a minimal working example."
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "da = xr.DataArray([1], [('x', [0])], name='a')\nda.indexes",
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 14,
"data": {
"text/plain": "x: Int64Index([0], dtype='int64', name='x')"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# This behaves appropriately\nda.isel(x=0).expand_dims('x').indexes",
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 15,
"data": {
"text/plain": "x: Int64Index([0], dtype='int64', name='x')"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# However this does not in xarray 0.12.0\nda.to_dataset().isel(x=0).expand_dims('x').a.indexes",
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 16,
"data": {
"text/plain": ""
},
"metadata": {}
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Adapted logic\n----------------"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def ensure_time_as_index_non_mutating(ds):\n time_indexed_coords = {TIME_WEIGHTS_STR, TIME_BOUNDS_STR}\n time_indexed_vars = set(ds.data_vars).union(time_indexed_coords)\n time_indexed_vars = time_indexed_vars.intersection(ds.variables)\n variables_to_replace = {}\n for name in ['a', 'time_bounds', 'time_weights']:\n if TIME_STR not in ds[name].indexes:\n # Note 'a' *should* have a time index; however due to the\n # xarray regression, it does not, so we end up operating on it here, and need\n # to adapt our logic accordingly.\n da = ds[name]\n \n # Don't expand_dims if a time dimension already exists (as in the case of 'a')\n if TIME_STR not in da.dims:\n da = ds[name].expand_dims(TIME_STR)\n da = da.assign_coords(**{TIME_STR: ds[TIME_STR]})\n variables_to_replace[name] = da\n return ds.assign(**variables_to_replace)",
"execution_count": 17,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ds = construct_data()",
"execution_count": 18,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "actual = ensure_time_as_index_non_mutating(ds)",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# ds has not been changed\nds",
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 20,
"data": {
"text/plain": "<xarray.Dataset>\nDimensions: (bounds: 2, time: 1)\nCoordinates:\n * time (time) int64 3\n time_weights int64 1\n time_bounds (bounds) float64 3.5 4.5\nDimensions without coordinates: bounds\nData variables:\n a (time) int64 -93"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.7",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment