Skip to content

Instantly share code, notes, and snippets.

@crusaderky
Created July 23, 2016 00:27
Show Gist options
  • Save crusaderky/002ba64ee270164931d32ea3366dce1f to your computer and use it in GitHub Desktop.
Save crusaderky/002ba64ee270164931d32ea3366dce1f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import xarray\n",
"import pandas\n",
"import numpy\n",
"from collections import OrderedDict\n",
"\n",
"def proper_unstack(array, dim):\n",
" \"\"\"Work around an issue in xarray that causes the data to be sorted\n",
" alphabetically by label on unstack():\n",
"\n",
" https://github.com/pydata/xarray/issues/906\n",
" \"\"\"\n",
" # Regenerate Pandas multi-index to be ordered by first appearance\n",
" mindex = array.coords[dim].to_pandas().index\n",
"\n",
" levels = []\n",
" labels = []\n",
" for levels_i, labels_i in zip(mindex.levels, mindex.labels):\n",
" level_map = OrderedDict()\n",
" \n",
" for label in labels_i:\n",
" if label not in level_map:\n",
" level_map[label] = len(level_map)\n",
"\n",
" levels.append([levels_i[k] for k in level_map.keys()])\n",
" labels.append([level_map[k] for k in labels_i])\n",
"\n",
" mindex = pandas.MultiIndex(levels, labels, names=mindex.names)\n",
" array = array.copy()\n",
" array.coords[dim] = mindex\n",
"\n",
" # Invoke builtin unstack\n",
" array = array.unstack(dim)\n",
" \n",
" # Convert object coords to string\n",
" for dim in array.dims:\n",
" if array.coords[dim].dtype == numpy.dtype('O'):\n",
" array.coords[dim] = array.coords[dim].values.astype(str)\n",
"\n",
" return array"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"def demo_unstack(index):\n",
" index = pandas.MultiIndex.from_tuples(index, names=['x', 'count'])\n",
" s = pandas.Series(list(range(len(index))), index)\n",
" a = xarray.DataArray(s)\n",
" print(\"STACKED:\")\n",
" print(a)\n",
" print(\"\\n\\nBUILTIN:\")\n",
" print(a.unstack('dim_0'))\n",
" print(\"\\n\\nPROPER:\")\n",
" print(proper_unstack(a, 'dim_0'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"STACKED:\n",
"<xarray.DataArray (dim_0: 8)>\n",
"array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)\n",
"Coordinates:\n",
" * dim_0 (dim_0) object ('x1', 'first') ('x1', 'second') ('x1', 'third') ...\n",
"\n",
"\n",
"BUILTIN:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[4, 7, 5, 6],\n",
" [0, 3, 1, 2]], dtype=int64)\n",
"Coordinates:\n",
" * x (x) object 'x0' 'x1'\n",
" * count (count) object 'first' 'fourth' 'second' 'third'\n",
"\n",
"\n",
"PROPER:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[0, 1, 2, 3],\n",
" [4, 5, 6, 7]], dtype=int64)\n",
"Coordinates:\n",
" * x (x) <U2 'x1' 'x0'\n",
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n"
]
}
],
"source": [
"demo_unstack([\n",
" ['x1', 'first' ],\n",
" ['x1', 'second'],\n",
" ['x1', 'third' ],\n",
" ['x1', 'fourth'],\n",
" ['x0', 'first' ],\n",
" ['x0', 'second'],\n",
" ['x0', 'third' ],\n",
" ['x0', 'fourth'],\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"STACKED:\n",
"<xarray.DataArray (dim_0: 8)>\n",
"array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)\n",
"Coordinates:\n",
" * dim_0 (dim_0) object ('x0', 'first') ('x0', 'second') ('x0', 'third') ...\n",
"\n",
"\n",
"BUILTIN:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[0, 3, 1, 2],\n",
" [7, 4, 6, 5]], dtype=int64)\n",
"Coordinates:\n",
" * x (x) object 'x0' 'x1'\n",
" * count (count) object 'first' 'fourth' 'second' 'third'\n",
"\n",
"\n",
"PROPER:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[0, 1, 2, 3],\n",
" [7, 6, 5, 4]], dtype=int64)\n",
"Coordinates:\n",
" * x (x) <U2 'x0' 'x1'\n",
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n"
]
}
],
"source": [
"demo_unstack([\n",
" ['x0', 'first' ],\n",
" ['x0', 'second'],\n",
" ['x0', 'third' ],\n",
" ['x0', 'fourth'], \n",
" ['x1', 'fourth'],\n",
" ['x1', 'third' ],\n",
" ['x1', 'second'],\n",
" ['x1', 'first' ],\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"STACKED:\n",
"<xarray.DataArray (dim_0: 5)>\n",
"array([0, 1, 2, 3, 4], dtype=int64)\n",
"Coordinates:\n",
" * dim_0 (dim_0) object ('x1', 'first') ('x1', 'second') ('x1', 'third') ...\n",
"\n",
"\n",
"BUILTIN:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[ nan, 4., nan, nan],\n",
" [ 0., 3., 1., 2.]])\n",
"Coordinates:\n",
" * x (x) object 'x0' 'x1'\n",
" * count (count) object 'first' 'fourth' 'second' 'third'\n",
"\n",
"\n",
"PROPER:\n",
"<xarray.DataArray (x: 2, count: 4)>\n",
"array([[ 0., 1., 2., 3.],\n",
" [ nan, nan, nan, 4.]])\n",
"Coordinates:\n",
" * x (x) <U2 'x1' 'x0'\n",
" * count (count) <U6 'first' 'second' 'third' 'fourth'\n"
]
}
],
"source": [
"demo_unstack([\n",
" ['x1', 'first' ],\n",
" ['x1', 'second'],\n",
" ['x1', 'third' ],\n",
" ['x1', 'fourth'],\n",
" ['x0', 'fourth'],\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment