Created
July 23, 2016 00:27
-
-
Save crusaderky/002ba64ee270164931d32ea3366dce1f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import xarray\n", | |
"import pandas\n", | |
"import numpy\n", | |
"from collections import OrderedDict\n", | |
"\n", | |
"def proper_unstack(array, dim):\n", | |
" \"\"\"Work around an issue in xarray that causes the data to be sorted\n", | |
" alphabetically by label on unstack():\n", | |
"\n", | |
" https://github.com/pydata/xarray/issues/906\n", | |
" \"\"\"\n", | |
" # Regenerate Pandas multi-index to be ordered by first appearance\n", | |
" mindex = array.coords[dim].to_pandas().index\n", | |
"\n", | |
" levels = []\n", | |
" labels = []\n", | |
" for levels_i, labels_i in zip(mindex.levels, mindex.labels):\n", | |
" level_map = OrderedDict()\n", | |
" \n", | |
" for label in labels_i:\n", | |
" if label not in level_map:\n", | |
" level_map[label] = len(level_map)\n", | |
"\n", | |
" levels.append([levels_i[k] for k in level_map.keys()])\n", | |
" labels.append([level_map[k] for k in labels_i])\n", | |
"\n", | |
" mindex = pandas.MultiIndex(levels, labels, names=mindex.names)\n", | |
" array = array.copy()\n", | |
" array.coords[dim] = mindex\n", | |
"\n", | |
" # Invoke builtin unstack\n", | |
" array = array.unstack(dim)\n", | |
" \n", | |
" # Convert object coords to string\n", | |
" for dim in array.dims:\n", | |
" if array.coords[dim].dtype == numpy.dtype('O'):\n", | |
" array.coords[dim] = array.coords[dim].values.astype(str)\n", | |
"\n", | |
" return array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def demo_unstack(index):\n", | |
" index = pandas.MultiIndex.from_tuples(index, names=['x', 'count'])\n", | |
" s = pandas.Series(list(range(len(index))), index)\n", | |
" a = xarray.DataArray(s)\n", | |
" print(\"STACKED:\")\n", | |
" print(a)\n", | |
" print(\"\\n\\nBUILTIN:\")\n", | |
" print(a.unstack('dim_0'))\n", | |
" print(\"\\n\\nPROPER:\")\n", | |
" print(proper_unstack(a, 'dim_0'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"STACKED:\n", | |
"<xarray.DataArray (dim_0: 8)>\n", | |
"array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)\n", | |
"Coordinates:\n", | |
" * dim_0 (dim_0) object ('x1', 'first') ('x1', 'second') ('x1', 'third') ...\n", | |
"\n", | |
"\n", | |
"BUILTIN:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[4, 7, 5, 6],\n", | |
" [0, 3, 1, 2]], dtype=int64)\n", | |
"Coordinates:\n", | |
" * x (x) object 'x0' 'x1'\n", | |
" * count (count) object 'first' 'fourth' 'second' 'third'\n", | |
"\n", | |
"\n", | |
"PROPER:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[0, 1, 2, 3],\n", | |
" [4, 5, 6, 7]], dtype=int64)\n", | |
"Coordinates:\n", | |
" * x (x) <U2 'x1' 'x0'\n", | |
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n" | |
] | |
} | |
], | |
"source": [ | |
"demo_unstack([\n", | |
" ['x1', 'first' ],\n", | |
" ['x1', 'second'],\n", | |
" ['x1', 'third' ],\n", | |
" ['x1', 'fourth'],\n", | |
" ['x0', 'first' ],\n", | |
" ['x0', 'second'],\n", | |
" ['x0', 'third' ],\n", | |
" ['x0', 'fourth'],\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"STACKED:\n", | |
"<xarray.DataArray (dim_0: 8)>\n", | |
"array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)\n", | |
"Coordinates:\n", | |
" * dim_0 (dim_0) object ('x0', 'first') ('x0', 'second') ('x0', 'third') ...\n", | |
"\n", | |
"\n", | |
"BUILTIN:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[0, 3, 1, 2],\n", | |
" [7, 4, 6, 5]], dtype=int64)\n", | |
"Coordinates:\n", | |
" * x (x) object 'x0' 'x1'\n", | |
" * count (count) object 'first' 'fourth' 'second' 'third'\n", | |
"\n", | |
"\n", | |
"PROPER:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[0, 1, 2, 3],\n", | |
" [7, 6, 5, 4]], dtype=int64)\n", | |
"Coordinates:\n", | |
" * x (x) <U2 'x0' 'x1'\n", | |
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n" | |
] | |
} | |
], | |
"source": [ | |
"demo_unstack([\n", | |
" ['x0', 'first' ],\n", | |
" ['x0', 'second'],\n", | |
" ['x0', 'third' ],\n", | |
" ['x0', 'fourth'], \n", | |
" ['x1', 'fourth'],\n", | |
" ['x1', 'third' ],\n", | |
" ['x1', 'second'],\n", | |
" ['x1', 'first' ],\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"STACKED:\n", | |
"<xarray.DataArray (dim_0: 5)>\n", | |
"array([0, 1, 2, 3, 4], dtype=int64)\n", | |
"Coordinates:\n", | |
" * dim_0 (dim_0) object ('x1', 'first') ('x1', 'second') ('x1', 'third') ...\n", | |
"\n", | |
"\n", | |
"BUILTIN:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[ nan, 4., nan, nan],\n", | |
" [ 0., 3., 1., 2.]])\n", | |
"Coordinates:\n", | |
" * x (x) object 'x0' 'x1'\n", | |
" * count (count) object 'first' 'fourth' 'second' 'third'\n", | |
"\n", | |
"\n", | |
"PROPER:\n", | |
"<xarray.DataArray (x: 2, count: 4)>\n", | |
"array([[ 0., 1., 2., 3.],\n", | |
" [ nan, nan, nan, 4.]])\n", | |
"Coordinates:\n", | |
" * x (x) <U2 'x1' 'x0'\n", | |
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n" | |
] | |
} | |
], | |
"source": [ | |
"demo_unstack([\n", | |
" ['x1', 'first' ],\n", | |
" ['x1', 'second'],\n", | |
" ['x1', 'third' ],\n", | |
" ['x1', 'fourth'],\n", | |
" ['x0', 'fourth'],\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment