Created
February 21, 2018 03:36
-
-
Save JiaweiZhuang/89a6bce7d5adc473134c7014bec065a0 to your computer and use it in GitHub Desktop.
xarray.apply_ufunc produces inconsistent coordinate values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import xarray as xr" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Preparation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# function to apply\n", | |
"A = np.random.rand(6, 4)\n", | |
"apply_A = lambda data: A.dot(data.T).T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(2, 4)" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# input data\n", | |
"data = np.random.rand(2, 4)\n", | |
"data.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(2, 6)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# change the core dimension from 4 to 6\n", | |
"apply_A(data).shape " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'data' (time: 2, x: 4)>\n", | |
"array([[ 0.840646, 0.330686, 0.669603, 0.840243],\n", | |
" [ 0.419193, 0.936886, 0.189482, 0.543247]])\n", | |
"Coordinates:\n", | |
" * time (time) int64 0 1\n", | |
" * x (x) int64 0 1 2 3" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# input DataArray\n", | |
"dr = xr.DataArray(data, \n", | |
" dims=['time', 'x'],\n", | |
" coords=[np.arange(2), np.arange(4)],\n", | |
" name='data'\n", | |
" )\n", | |
"dr" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Case 1: not specifying input/output dimension" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'data' (time: 2, x: 6)>\n", | |
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n", | |
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n", | |
"Coordinates:\n", | |
" * time (time) int64 0 1\n", | |
" * x (x) int64 0 1 2 3" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# note that the coordinate 'x' has incorrect dimension (should be 6, not 4)\n", | |
"dr_out = xr.apply_ufunc(apply_A, dr)\n", | |
"dr_out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "conflicting sizes for dimension 'x': length 6 on 'data' and length 4 on 'x'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-7-4aafe6266f8a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# inconsistent coordinate size is not allowed in a NetCDF file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdr_out\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_netcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'apply_ufunc_result.nc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mto_netcdf\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0;31m# No problems with the name - so we're fine!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1426\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_netcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mto_dataset\u001b[0;34m(self, dim, name)\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_dataset_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_dataset_whole\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 362\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 363\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36m_to_dataset_whole\u001b[0;34m(self, name, shallow_copy)\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mcoord_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_coords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_vars_and_coord_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 329\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36m_from_vars_and_coord_names\u001b[0;34m(cls, variables, coord_names, attrs)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_from_vars_and_coord_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mdims\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcalculate_dimensions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_construct_direct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdims\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36mcalculate_dimensions\u001b[0;34m(variables)\u001b[0m\n\u001b[1;32m 111\u001b[0m raise ValueError('conflicting sizes for dimension %r: '\n\u001b[1;32m 112\u001b[0m \u001b[0;34m'length %s on %r and length %s on %r'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m (dim, size, k, dims[dim], last_used[dim]))\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdims\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: conflicting sizes for dimension 'x': length 6 on 'data' and length 4 on 'x'" | |
] | |
} | |
], | |
"source": [ | |
"# inconsistent coordinate size is not allowed in a NetCDF file\n", | |
"dr_out.to_netcdf('apply_ufunc_result.nc')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This I/O issue can be fixed by correcting the coordinate variable:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dr_out['x'] = np.arange(6)\n", | |
"dr_out.to_netcdf('apply_ufunc_result.nc') # no error now" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Case 2: Specifying a new output dimension name" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Alternatively, we can give `output_core_dims` a different name. The new dimension `x_new` will have no coordinate value by default. This is still a legal NetCDF file." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dr_out2 = xr.apply_ufunc(apply_A, dr, \n", | |
" input_core_dims=[['x']],\n", | |
" output_core_dims=[['x_new']])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dr_out2.to_netcdf('apply_ufunc_result.nc') # missing coordinate value is fine" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Case 3: Specifying the same output dimension name" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If `output_core_dims` is the same as `input_core_dims`, the old, incorrect coordinate (`x` here) will be kept." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'data' (time: 2, x: 6)>\n", | |
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n", | |
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n", | |
"Coordinates:\n", | |
" * time (time) int64 0 1\n", | |
" * x (x) int64 0 1 2 3" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"xr.apply_ufunc(apply_A, dr, \n", | |
" input_core_dims=[['x']],\n", | |
" output_core_dims=[['x']],)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`xr.apply_ufunc` should probably drop the core dimension coordinate (`x` here) by default. This is safer (will not produce illegal NetCDF file) and also makes more physical sense (The core coordinate is likely to change)." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Case 4: Specifying output coordinate size" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`output_sizes` has no effect on either variable size or coordinate size, as long as the input is not a dask array..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'data' (time: 2, x: 6)>\n", | |
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n", | |
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n", | |
"Coordinates:\n", | |
" * time (time) int64 0 1\n", | |
" * x (x) int64 0 1 2 3" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"xr.apply_ufunc(apply_A, dr, \n", | |
" input_core_dims=[['x']],\n", | |
" output_core_dims=[['x']],\n", | |
" output_sizes = {'x': 999})" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": "block", | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment