Skip to content

Instantly share code, notes, and snippets.

@JiaweiZhuang
Created February 21, 2018 03:36
Show Gist options
  • Save JiaweiZhuang/89a6bce7d5adc473134c7014bec065a0 to your computer and use it in GitHub Desktop.
Save JiaweiZhuang/89a6bce7d5adc473134c7014bec065a0 to your computer and use it in GitHub Desktop.
xarray.apply_ufunc produces inconsistent coordinate values
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import xarray as xr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Preparation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# function to apply\n",
"A = np.random.rand(6, 4)\n",
"apply_A = lambda data: A.dot(data.T).T"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 4)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# input data\n",
"data = np.random.rand(2, 4)\n",
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(2, 6)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# change the core dimension from 4 to 6\n",
"apply_A(data).shape "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.DataArray 'data' (time: 2, x: 4)>\n",
"array([[ 0.840646, 0.330686, 0.669603, 0.840243],\n",
" [ 0.419193, 0.936886, 0.189482, 0.543247]])\n",
"Coordinates:\n",
" * time (time) int64 0 1\n",
" * x (x) int64 0 1 2 3"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# input DataArray\n",
"dr = xr.DataArray(data, \n",
" dims=['time', 'x'],\n",
" coords=[np.arange(2), np.arange(4)],\n",
" name='data'\n",
" )\n",
"dr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Case 1: not specifying input/output dimension"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.DataArray 'data' (time: 2, x: 6)>\n",
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n",
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n",
"Coordinates:\n",
" * time (time) int64 0 1\n",
" * x (x) int64 0 1 2 3"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# note that the coordinate 'x' has incorrect dimension (should be 6, not 4)\n",
"dr_out = xr.apply_ufunc(apply_A, dr)\n",
"dr_out"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "conflicting sizes for dimension 'x': length 6 on 'data' and length 4 on 'x'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-4aafe6266f8a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# inconsistent coordinate size is not allowed in a NetCDF file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdr_out\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_netcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'apply_ufunc_result.nc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mto_netcdf\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0;31m# No problems with the name - so we're fine!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1426\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_netcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36mto_dataset\u001b[0;34m(self, dim, name)\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_dataset_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_to_dataset_whole\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 362\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 363\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataarray.py\u001b[0m in \u001b[0;36m_to_dataset_whole\u001b[0;34m(self, name, shallow_copy)\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mcoord_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_coords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_vars_and_coord_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 329\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36m_from_vars_and_coord_names\u001b[0;34m(cls, variables, coord_names, attrs)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_from_vars_and_coord_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mdims\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcalculate_dimensions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_construct_direct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoord_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdims\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Research/Computing/miniconda3/envs/geo/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36mcalculate_dimensions\u001b[0;34m(variables)\u001b[0m\n\u001b[1;32m 111\u001b[0m raise ValueError('conflicting sizes for dimension %r: '\n\u001b[1;32m 112\u001b[0m \u001b[0;34m'length %s on %r and length %s on %r'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m (dim, size, k, dims[dim], last_used[dim]))\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdims\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: conflicting sizes for dimension 'x': length 6 on 'data' and length 4 on 'x'"
]
}
],
"source": [
"# inconsistent coordinate size is not allowed in a NetCDF file\n",
"dr_out.to_netcdf('apply_ufunc_result.nc')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This I/O issue can be fixed by correcting the coordinate variable:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dr_out['x'] = np.arange(6)\n",
"dr_out.to_netcdf('apply_ufunc_result.nc') # no error now"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Case 2: Specifying a new output dimension name"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, we can give `output_core_dims` a different name. The new dimension `x_new` will have no coordinate value by default. This is still a legal NetCDF file."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"dr_out2 = xr.apply_ufunc(apply_A, dr, \n",
" input_core_dims=[['x']],\n",
" output_core_dims=[['x_new']])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"dr_out2.to_netcdf('apply_ufunc_result.nc') # missing coordinate value is fine"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Case 3: Specifying the same output dimension name"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If `output_core_dims` is the same as `input_core_dims`, the old, incorrect coordinate (`x` here) will be kept."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.DataArray 'data' (time: 2, x: 6)>\n",
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n",
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n",
"Coordinates:\n",
" * time (time) int64 0 1\n",
" * x (x) int64 0 1 2 3"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xr.apply_ufunc(apply_A, dr, \n",
" input_core_dims=[['x']],\n",
" output_core_dims=[['x']],)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`xr.apply_ufunc` should probably drop the core dimension coordinate (`x` here) by default. This is safer (will not produce illegal NetCDF file) and also makes more physical sense (The core coordinate is likely to change)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Case 4: Specifying output coordinate size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`output_sizes` has no effect on either variable size or coordinate size, as long as the input is not a dask array..."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.DataArray 'data' (time: 2, x: 6)>\n",
"array([[ 1.24765 , 0.24795 , 1.053913, 1.338334, 1.9124 , 0.860453],\n",
" [ 0.923691, 0.432066, 0.751094, 1.274393, 1.106883, 0.899529]])\n",
"Coordinates:\n",
" * time (time) int64 0 1\n",
" * x (x) int64 0 1 2 3"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xr.apply_ufunc(apply_A, dr, \n",
" input_core_dims=[['x']],\n",
" output_core_dims=[['x']],\n",
" output_sizes = {'x': 999})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"toc_cell": false,
"toc_position": {},
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment