Created
July 21, 2017 23:36
-
-
Save baldwint/32b570ac47dc22405516b2e807b7b013 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'0.21.0.dev+286.g4efe6560e'" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.__version__" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pandas stacking bug\n", | |
"\n", | |
"I set out to build a differencing tool for dataframes, and ran into some unexpected behavior in Pandas." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## example dataframes\n", | |
"\n", | |
"Construct two dataframes (of object type) that are similar excpet for some missing rows and some changed values." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ix = pd.Index(['one', 'two', 'three', 'four',\n", | |
" 'five', 'six', 'seven', 'eight'], name='i1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"cols = pd.Index(['class1', 'class2', 'class3', 'class4'], name='c1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"l = len(ix)\n", | |
"w = len(cols)\n", | |
"dt = pd.np.arange(l*w).reshape((l,w)).astype(str)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dt[-2, :] = None # or pd.np.nan" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>c1</th>\n", | |
" <th>class1</th>\n", | |
" <th>class2</th>\n", | |
" <th>class3</th>\n", | |
" <th>class4</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>six</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>eight</th>\n", | |
" <td>28</td>\n", | |
" <td>29</td>\n", | |
" <td>30</td>\n", | |
" <td>31</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4\n", | |
"i1 \n", | |
"one 0 1 2 3\n", | |
"two 4 5 6 7\n", | |
"three 8 9 10 11\n", | |
"four 12 13 14 15\n", | |
"five 16 17 18 19\n", | |
"six 20 21 22 23\n", | |
"seven None None None None\n", | |
"eight 28 29 30 31" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"eg1 = pd.DataFrame(data=dt, columns=cols, index=ix)\n", | |
"eg2 = eg1.copy(deep=True)\n", | |
"eg1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"c1\n", | |
"class1 object\n", | |
"class2 object\n", | |
"class3 object\n", | |
"class4 object\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"eg1.dtypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# introduce differences to find\n", | |
"eg1 = eg1.drop('six').set_value('two', 'class2', -1)\n", | |
"eg2 = eg2.drop('one').set_value('seven', 'class3', -1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>c1</th>\n", | |
" <th>class1</th>\n", | |
" <th>class2</th>\n", | |
" <th>class3</th>\n", | |
" <th>class4</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>-1</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>eight</th>\n", | |
" <td>28</td>\n", | |
" <td>29</td>\n", | |
" <td>30</td>\n", | |
" <td>31</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4\n", | |
"i1 \n", | |
"one 0 1 2 3\n", | |
"two 4 -1 6 7\n", | |
"three 8 9 10 11\n", | |
"four 12 13 14 15\n", | |
"five 16 17 18 19\n", | |
"seven None None None None\n", | |
"eight 28 29 30 31" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"eg1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>c1</th>\n", | |
" <th>class1</th>\n", | |
" <th>class2</th>\n", | |
" <th>class3</th>\n", | |
" <th>class4</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>six</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>eight</th>\n", | |
" <td>28</td>\n", | |
" <td>29</td>\n", | |
" <td>30</td>\n", | |
" <td>31</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4\n", | |
"i1 \n", | |
"two 4 5 6 7\n", | |
"three 8 9 10 11\n", | |
"four 12 13 14 15\n", | |
"five 16 17 18 19\n", | |
"six 20 21 22 23\n", | |
"seven None None -1 None\n", | |
"eight 28 29 30 31" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"eg2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## perform differencing\n", | |
"\n", | |
"by concatenating dataframes together and dropping duplicate rows." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['class1', 'class2', 'class3', 'class4']" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(cols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"eg1['Source'] = 'eg1'\n", | |
"eg2['Source'] = 'eg2'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['i1', 'class1', 'class2', 'class3', 'class4']" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"index_name = eg1.index.name\n", | |
"assert index_name == eg2.index.name\n", | |
"\n", | |
"cmp_cols = [index_name, ] + list(cols)\n", | |
"cmp_cols" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"unm = pd.concat(\n", | |
" (eg1, eg2)\n", | |
").reset_index().drop_duplicates(\n", | |
" keep=False,\n", | |
" subset=cmp_cols,\n", | |
").set_index(index_name)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>c1</th>\n", | |
" <th>class1</th>\n", | |
" <th>class2</th>\n", | |
" <th>class3</th>\n", | |
" <th>class4</th>\n", | |
" <th>Source</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>eg1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>-1</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>eg1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>eg1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>eg2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>six</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" <td>eg2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" <td>None</td>\n", | |
" <td>eg2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4 Source\n", | |
"i1 \n", | |
"one 0 1 2 3 eg1\n", | |
"two 4 -1 6 7 eg1\n", | |
"seven None None None None eg1\n", | |
"two 4 5 6 7 eg2\n", | |
"six 20 21 22 23 eg2\n", | |
"seven None None -1 None eg2" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"unm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This \"unmatched\" frame includes the orphaned rows on either side, and a pair of rows for each changed row. We can pivot this on `Source` to view a side-by-side comparison within each column.\n", | |
"\n", | |
"To retain information about which rows were added/deleted, as opposed to being changed to or from a literal null, add an \"Exists\" flag before pivoting:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"unm['Exists'] = True" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"perform the pivot:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"egp = unm.set_index(['Source'], append=True).unstack()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"unpaired rows (for additions or deletions) will now contain NaN values in one of the \"Exists\" columns. Fill those with False:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# blank values in this column pair indicate that the row didn't exist there\n", | |
"egp.Exists = egp.Exists.fillna(False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th>c1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class2</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class3</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class4</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Exists</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Source</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>None</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" <td>2</td>\n", | |
" <td>None</td>\n", | |
" <td>3</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>six</th>\n", | |
" <td>None</td>\n", | |
" <td>20</td>\n", | |
" <td>None</td>\n", | |
" <td>21</td>\n", | |
" <td>None</td>\n", | |
" <td>22</td>\n", | |
" <td>None</td>\n", | |
" <td>23</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>-1</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>7</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4 Exists \n", | |
"Source eg1 eg2 eg1 eg2 eg1 eg2 eg1 eg2 eg1 eg2\n", | |
"i1 \n", | |
"one 0 None 1 None 2 None 3 None True False\n", | |
"seven None None None None None -1 None None True True\n", | |
"six None 20 None 21 None 22 None 23 False True\n", | |
"two 4 4 -1 5 6 6 7 7 True True" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"egp" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"It'd be nicer to combine the \"Exists\" columns by human-readable labels:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"chtype = {\n", | |
" (True, True): \"Change\",\n", | |
" (True, False): \"Delete\",\n", | |
" (False, True): \"Add\",\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"change_type = egp.Exists.apply(tuple, axis=1).map(chtype) #.astype('category')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"i1\n", | |
"one Delete\n", | |
"seven Change\n", | |
"six Add\n", | |
"two Change\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"change_type" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"make a human-readable version of this:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"egh = egp.copy(deep=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"egh['Change Type'] = change_type" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th>c1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class2</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class3</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class4</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Exists</th>\n", | |
" <th>Change Type</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Source</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>None</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" <td>2</td>\n", | |
" <td>None</td>\n", | |
" <td>3</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>Delete</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>Change</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>six</th>\n", | |
" <td>None</td>\n", | |
" <td>20</td>\n", | |
" <td>None</td>\n", | |
" <td>21</td>\n", | |
" <td>None</td>\n", | |
" <td>22</td>\n", | |
" <td>None</td>\n", | |
" <td>23</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>Add</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>-1</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>7</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>Change</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4 Exists \\\n", | |
"Source eg1 eg2 eg1 eg2 eg1 eg2 eg1 eg2 eg1 eg2 \n", | |
"i1 \n", | |
"one 0 None 1 None 2 None 3 None True False \n", | |
"seven None None None None None -1 None None True True \n", | |
"six None 20 None 21 None 22 None 23 False True \n", | |
"two 4 4 -1 5 6 6 7 7 True True \n", | |
"\n", | |
"c1 Change Type \n", | |
"Source \n", | |
"i1 \n", | |
"one Delete \n", | |
"seven Change \n", | |
"six Add \n", | |
"two Change " | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"egh" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"put it in the index:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>c1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class1</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class2</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class3</th>\n", | |
" <th colspan=\"2\" halign=\"left\">class4</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Exists</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>Source</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Change Type</th>\n", | |
" <th>i1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Delete</th>\n", | |
" <th>one</th>\n", | |
" <td>0</td>\n", | |
" <td>None</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" <td>2</td>\n", | |
" <td>None</td>\n", | |
" <td>3</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Change</th>\n", | |
" <th>seven</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Add</th>\n", | |
" <th>six</th>\n", | |
" <td>None</td>\n", | |
" <td>20</td>\n", | |
" <td>None</td>\n", | |
" <td>21</td>\n", | |
" <td>None</td>\n", | |
" <td>22</td>\n", | |
" <td>None</td>\n", | |
" <td>23</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Change</th>\n", | |
" <th>two</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>-1</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>7</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"c1 class1 class2 class3 class4 Exists \\\n", | |
"Source eg1 eg2 eg1 eg2 eg1 eg2 eg1 eg2 eg1 \n", | |
"Change Type i1 \n", | |
"Delete one 0 None 1 None 2 None 3 None True \n", | |
"Change seven None None None None None -1 None None True \n", | |
"Add six None 20 None 21 None 22 None 23 False \n", | |
"Change two 4 4 -1 5 6 6 7 7 True \n", | |
"\n", | |
"c1 \n", | |
"Source eg2 \n", | |
"Change Type i1 \n", | |
"Delete one False \n", | |
"Change seven True \n", | |
"Add six True \n", | |
"Change two True " | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"egh = egh.reset_index().set_index(['Change Type', 'i1'])\n", | |
"egh.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is nice and readable, but by unstacking it we could focus on just the columns that had changes:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Source</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Change Type</th>\n", | |
" <th>i1</th>\n", | |
" <th>c1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Delete</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">one</th>\n", | |
" <th>Exists</th>\n", | |
" <td>0</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>2</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>3</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Change</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">seven</th>\n", | |
" <th>Exists</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Add</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">six</th>\n", | |
" <th>Exists</th>\n", | |
" <td>None</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>None</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>None</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>None</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Change</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">two</th>\n", | |
" <th>Exists</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>-1</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>7</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Source eg1 eg2\n", | |
"Change Type i1 c1 \n", | |
"Delete one Exists 0 None\n", | |
" class1 1 None\n", | |
" class2 2 None\n", | |
" class3 3 None\n", | |
" class4 True False\n", | |
"Change seven Exists None None\n", | |
" class1 None None\n", | |
" class2 None -1\n", | |
" class3 None None\n", | |
" class4 True True\n", | |
"Add six Exists None 20\n", | |
" class1 None 21\n", | |
" class2 None 22\n", | |
" class3 None 23\n", | |
" class4 False True\n", | |
"Change two Exists 4 4\n", | |
" class1 -1 5\n", | |
" class2 6 6\n", | |
" class3 7 7\n", | |
" class4 True True" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"egh.stack(level=0, dropna=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"there is a bug here.\n", | |
"\n", | |
" - the innermost index values are rotated by one with respect to the data\n", | |
"\n", | |
"In another instance, I observed a stranger version of this bug where\n", | |
"\n", | |
" - `class1` is missing\n", | |
" - the rest of the innermost index values are shifted up\n", | |
" - the label for one of the index levels (`Change Type`) shows up as the final innermost index value\n", | |
" \n", | |
"the data seems to all be in the right place but the indexing is incorrect." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Workaround\n", | |
"\n", | |
"Per Pandas bug [16925](https://github.com/pandas-dev/pandas/issues/16925), sorting the columns fixes it:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Source</th>\n", | |
" <th>eg1</th>\n", | |
" <th>eg2</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Change Type</th>\n", | |
" <th>i1</th>\n", | |
" <th>c1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Delete</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">one</th>\n", | |
" <th>Exists</th>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>0</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>2</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>3</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Change</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">seven</th>\n", | |
" <th>Exists</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>None</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Add</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">six</th>\n", | |
" <th>Exists</th>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>None</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>None</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>None</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>None</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">Change</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">two</th>\n", | |
" <th>Exists</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class1</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class2</th>\n", | |
" <td>-1</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class3</th>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>class4</th>\n", | |
" <td>7</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Source eg1 eg2\n", | |
"Change Type i1 c1 \n", | |
"Delete one Exists True False\n", | |
" class1 0 None\n", | |
" class2 1 None\n", | |
" class3 2 None\n", | |
" class4 3 None\n", | |
"Change seven Exists True True\n", | |
" class1 None None\n", | |
" class2 None None\n", | |
" class3 None -1\n", | |
" class4 None None\n", | |
"Add six Exists False True\n", | |
" class1 None 20\n", | |
" class2 None 21\n", | |
" class3 None 22\n", | |
" class4 None 23\n", | |
"Change two Exists True True\n", | |
" class1 4 4\n", | |
" class2 -1 5\n", | |
" class3 6 6\n", | |
" class4 7 7" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"egh.sort_index(axis=1).stack(level=0, dropna=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"INSTALLED VERSIONS\n", | |
"------------------\n", | |
"commit: None\n", | |
"python: 3.6.2.final.0\n", | |
"python-bits: 64\n", | |
"OS: Darwin\n", | |
"OS-release: 16.6.0\n", | |
"machine: x86_64\n", | |
"processor: i386\n", | |
"byteorder: little\n", | |
"LC_ALL: None\n", | |
"LANG: en_US.UTF-8\n", | |
"LOCALE: en_US.UTF-8\n", | |
"\n", | |
"pandas: 0.21.0.dev+286.g4efe6560e\n", | |
"pytest: None\n", | |
"pip: 9.0.1\n", | |
"setuptools: 27.2.0\n", | |
"Cython: 0.25.2\n", | |
"numpy: 1.13.1\n", | |
"scipy: None\n", | |
"xarray: None\n", | |
"IPython: 6.1.0\n", | |
"sphinx: None\n", | |
"patsy: None\n", | |
"dateutil: 2.6.1\n", | |
"pytz: 2017.2\n", | |
"blosc: None\n", | |
"bottleneck: None\n", | |
"tables: None\n", | |
"numexpr: None\n", | |
"feather: None\n", | |
"matplotlib: None\n", | |
"openpyxl: None\n", | |
"xlrd: None\n", | |
"xlwt: None\n", | |
"xlsxwriter: None\n", | |
"lxml: None\n", | |
"bs4: None\n", | |
"html5lib: None\n", | |
"sqlalchemy: None\n", | |
"pymysql: None\n", | |
"psycopg2: None\n", | |
"jinja2: None\n", | |
"s3fs: None\n", | |
"pandas_gbq: None\n", | |
"pandas_datareader: None\n" | |
] | |
} | |
], | |
"source": [ | |
"pd.show_versions()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Environment (conda_pandasdev)", | |
"language": "python", | |
"name": "conda_pandasdev" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment