Skip to content

Instantly share code, notes, and snippets.

Created January 6, 2013 03:35
Show Gist options
  • Save anonymous/4465051 to your computer and use it in GitHub Desktop.
Save anonymous/4465051 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "multiindex_problem"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np\n",
"import sys\n",
"\n",
"print \"\\npandas version:\",pd.__version__"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"pandas version: 0.10.0\n"
]
}
],
"prompt_number": 20
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"define functions to generate 2 and 3 level multi-index based dataframes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def gen_l3_mi(nchs,ndets,ntimes):\n",
" \"generate a 3-level multi-index dataframe\"\n",
" c = ['c'+str(i) for i in range(1,nchs+1)]\n",
" det = ['d'+str(i) for i in range(1,ndets+1)]\n",
" times = ['t'+str(i) for i in range(1,ntimes+1)]\n",
" ind = []\n",
" for ch in c:\n",
" for d in det:\n",
" for t in times:\n",
" ind.append((ch,d,t))\n",
" index = pd.MultiIndex.from_tuples(ind)\n",
" return pd.DataFrame(np.random.randn(nchs*ndets*ntimes,3), \n",
" columns=['A','B','C'],index=index)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def gen_l2_mi(nchs,ndets):\n",
" \"generate a 2-level multi-index dataframe\"\n",
" c = ['c'+str(i) for i in range(1,nchs+1)]\n",
" det = ['d'+str(i) for i in range(1,ndets+1)]\n",
" ind = []\n",
" for ch in c:\n",
" for d in det:\n",
" ind.append((ch,d))\n",
" index = pd.MultiIndex.from_tuples(ind)\n",
" return pd.DataFrame(np.random.randn(nchs*ndets,3), \n",
" columns=['A','B','C'],index=index)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 22
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"example of 2-level frame"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l2_mi(2,3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td rowspan=\"3\" valign=\"top\"><strong>c1</strong></td>\n",
" <td><strong>d1</strong></td>\n",
" <td>-0.313954</td>\n",
" <td> 0.342877</td>\n",
" <td>-0.221101</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>d2</strong></td>\n",
" <td>-0.345676</td>\n",
" <td> 1.169511</td>\n",
" <td> 1.904327</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>d3</strong></td>\n",
" <td> 1.480474</td>\n",
" <td>-0.721313</td>\n",
" <td> 0.053794</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"3\" valign=\"top\"><strong>c2</strong></td>\n",
" <td><strong>d1</strong></td>\n",
" <td>-0.324620</td>\n",
" <td>-1.462995</td>\n",
" <td> 1.242864</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>d2</strong></td>\n",
" <td>-0.549502</td>\n",
" <td> 0.671936</td>\n",
" <td> 1.502489</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>d3</strong></td>\n",
" <td> 1.805416</td>\n",
" <td> 0.881110</td>\n",
" <td> 3.459964</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 23,
"text": [
" A B C\n",
"c1 d1 -0.313954 0.342877 -0.221101\n",
" d2 -0.345676 1.169511 1.904327\n",
" d3 1.480474 -0.721313 0.053794\n",
"c2 d1 -0.324620 -1.462995 1.242864\n",
" d2 -0.549502 0.671936 1.502489\n",
" d3 1.805416 0.881110 3.459964"
]
}
],
"prompt_number": 23
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"example of 3-level frame"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l3_mi(2,3,2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td rowspan=\"6\" valign=\"top\"><strong>c1</strong></td>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d1</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td> 0.008900</td>\n",
" <td> 0.823869</td>\n",
" <td> 1.738161</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td> 1.057663</td>\n",
" <td> 0.399788</td>\n",
" <td>-0.791813</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d2</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td> 1.843533</td>\n",
" <td> 0.584357</td>\n",
" <td>-0.825108</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td>-1.064736</td>\n",
" <td> 1.330875</td>\n",
" <td> 1.014513</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d3</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td> 0.357935</td>\n",
" <td> 0.485275</td>\n",
" <td>-0.437894</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td> 1.155996</td>\n",
" <td>-0.119493</td>\n",
" <td>-0.184729</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"6\" valign=\"top\"><strong>c2</strong></td>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d1</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td> 0.523216</td>\n",
" <td>-1.581680</td>\n",
" <td>-1.339100</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td>-0.492205</td>\n",
" <td>-0.435085</td>\n",
" <td>-0.068826</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d2</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td>-1.287988</td>\n",
" <td> 0.017792</td>\n",
" <td>-0.079597</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td>-2.558944</td>\n",
" <td>-1.181913</td>\n",
" <td> 0.215015</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\"><strong>d3</strong></td>\n",
" <td><strong>t1</strong></td>\n",
" <td> 0.402463</td>\n",
" <td> 1.628911</td>\n",
" <td>-0.092130</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td>-0.673550</td>\n",
" <td> 1.391845</td>\n",
" <td>-0.026922</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 24,
"text": [
" A B C\n",
"c1 d1 t1 0.008900 0.823869 1.738161\n",
" t2 1.057663 0.399788 -0.791813\n",
" d2 t1 1.843533 0.584357 -0.825108\n",
" t2 -1.064736 1.330875 1.014513\n",
" d3 t1 0.357935 0.485275 -0.437894\n",
" t2 1.155996 -0.119493 -0.184729\n",
"c2 d1 t1 0.523216 -1.581680 -1.339100\n",
" t2 -0.492205 -0.435085 -0.068826\n",
" d2 t1 -1.287988 0.017792 -0.079597\n",
" t2 -2.558944 -1.181913 0.215015\n",
" d3 t1 0.402463 1.628911 -0.092130\n",
" t2 -0.673550 1.391845 -0.026922"
]
}
],
"prompt_number": 24
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"indexing the first 2 levels works fine with these:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l2_mi(2,3).ix['c1','d3']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": [
"A -0.374957\n",
"B 1.163482\n",
"C -1.436960\n",
"Name: (c1, d3)"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l3_mi(2,3,2).ix['c1','d3']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td><strong>t1</strong></td>\n",
" <td>-0.933814</td>\n",
" <td>-0.194486</td>\n",
" <td>-1.247901</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td>-1.737364</td>\n",
" <td> 0.126783</td>\n",
" <td>-1.676366</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 26,
"text": [
" A B C\n",
"t1 -0.933814 -0.194486 -1.247901\n",
"t2 -1.737364 0.126783 -1.676366"
]
}
],
"prompt_number": 26
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"this works fine until the size of the first two levels reach 9,9, even with the third level getting larger than 10:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l2_mi(9,9).ix['c9','d9']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 27,
"text": [
"A 0.335086\n",
"B 0.039880\n",
"C 0.082347\n",
"Name: (c9, d9)"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l3_mi(9,9,12).ix['c9','d5']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td><strong>t1</strong></td>\n",
" <td>-2.628023</td>\n",
" <td> 1.238278</td>\n",
" <td>-1.261069</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t2</strong></td>\n",
" <td> 0.227153</td>\n",
" <td> 1.598195</td>\n",
" <td> 1.813323</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t3</strong></td>\n",
" <td>-1.208046</td>\n",
" <td>-1.225411</td>\n",
" <td>-0.506603</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t4</strong></td>\n",
" <td>-0.404405</td>\n",
" <td>-0.404879</td>\n",
" <td> 0.303644</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t5</strong></td>\n",
" <td> 1.732944</td>\n",
" <td>-0.783047</td>\n",
" <td>-0.168029</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t6</strong></td>\n",
" <td>-0.564203</td>\n",
" <td> 0.665243</td>\n",
" <td> 0.726210</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t7</strong></td>\n",
" <td> 1.238463</td>\n",
" <td> 0.602998</td>\n",
" <td>-0.075746</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t8</strong></td>\n",
" <td> 0.834358</td>\n",
" <td>-1.910082</td>\n",
" <td>-0.560975</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t9</strong></td>\n",
" <td> 1.025414</td>\n",
" <td>-0.564223</td>\n",
" <td>-0.707206</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t10</strong></td>\n",
" <td>-3.086643</td>\n",
" <td> 1.612791</td>\n",
" <td>-1.820178</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t11</strong></td>\n",
" <td>-0.474348</td>\n",
" <td> 1.348884</td>\n",
" <td>-1.031201</td>\n",
" </tr>\n",
" <tr>\n",
" <td><strong>t12</strong></td>\n",
" <td>-0.332214</td>\n",
" <td> 0.027670</td>\n",
" <td>-1.704551</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 28,
"text": [
" A B C\n",
"t1 -2.628023 1.238278 -1.261069\n",
"t2 0.227153 1.598195 1.813323\n",
"t3 -1.208046 -1.225411 -0.506603\n",
"t4 -0.404405 -0.404879 0.303644\n",
"t5 1.732944 -0.783047 -0.168029\n",
"t6 -0.564203 0.665243 0.726210\n",
"t7 1.238463 0.602998 -0.075746\n",
"t8 0.834358 -1.910082 -0.560975\n",
"t9 1.025414 -0.564223 -0.707206\n",
"t10 -3.086643 1.612791 -1.820178\n",
"t11 -0.474348 1.348884 -1.031201\n",
"t12 -0.332214 0.027670 -1.704551"
]
}
],
"prompt_number": 28
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"but when one of the first two levels reaches 10, this indexing does not work anymore for the 3-level deep multi-index dataframe:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" gen_l3_mi(10,9,9).ix['c9','d9']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"u'no item named d9'\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"while the 2-level index has no problem with that:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"gen_l2_mi(10,9).ix['c9','d5']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 30,
"text": [
"A -0.195208\n",
"B -1.696686\n",
"C -0.784776\n",
"Name: (c9, d5)"
]
}
],
"prompt_number": 30
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### now comes the weird part: the 3-level indexing works again, when the 3rd level size is being indexed as well:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" print gen_l3_mi(10,9,9).ix['c9','d9','t2']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"A 0.502935\n",
"B -0.126213\n",
"C -1.574937\n",
"Name: (c9, d9, t2)\n"
]
}
],
"prompt_number": 31
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### and even weirder (at least the error message:) when indexing the 3rd level to a higher index than available, the 2nd-level index is being announced as not available:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" print gen_l3_mi(10,9,3).ix['c9','d9','t4']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"u'no item named d9'\n"
]
}
],
"prompt_number": 32
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"and this does not change when going back to single digit (i.e. < 10) index sizes:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" print gen_l3_mi(9,9,3).ix['c9','d9','t4']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"u'no item named d9'\n"
]
}
],
"prompt_number": 33
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"When reducing level2 1 under being indexed, the behaviour that the previous index is being announced as the culprit, does not happen:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" print gen_l3_mi(9,8,3).ix['c9','d9','t4']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"u'no item named d9'\n"
]
}
],
"prompt_number": 34
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Using df.sortlevel, the indexing finally works:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" print gen_l3_mi(10,9,9).sortlevel(0).ix['c9','d9']\n",
"except KeyError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" A B C\n",
"t1 -0.614408 1.141708 0.635459\n",
"t2 -0.893975 1.855616 -0.284339\n",
"t3 -0.220077 0.257822 -0.804056\n",
"t4 0.072627 -1.292175 -0.310809\n",
"t5 -0.071669 -0.178752 -2.647866\n",
"t6 -0.541183 -2.360437 -1.792694\n",
"t7 -0.831846 -0.490298 0.648153\n",
"t8 1.427516 2.063302 1.050923\n",
"t9 -0.718621 1.216597 1.039047\n"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print gen_l3_mi(15,13,12).sortlevel(0).ix[('c9','d9'),'A']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"t1 0.906801\n",
"t10 -0.951765\n",
"t11 -1.538490\n",
"t12 0.345519\n",
"t2 -1.263581\n",
"t3 1.427725\n",
"t4 -0.437448\n",
"t5 1.687768\n",
"t6 1.636617\n",
"t7 -0.484136\n",
"t8 0.260679\n",
"t9 0.861448\n",
"Name: A\n"
]
}
],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 36
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment