Created
December 26, 2012 18:19
-
-
Save phobson/4381998 to your computer and use it in GitHub Desktop.
How to un-crosstab an excel file using pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Uncrosstab Demo" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### This cell is the bare bones example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas\n", | |
"excel_file = pandas.ExcelFile('xtab_demo.xlsx')\n", | |
"xtab_data = excel_file.parse('Sheet1', index_col=0)\n", | |
"flat_data = pandas.DataFrame(xtab_data.stack()) # un-crosstab the data\n", | |
"flat_data.to_excel('flat_data.xlsx')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"c:\\Python27\\lib\\site-packages\\pytz\\__init__.py:35: UserWarning: Module IPython was already imported from c:\\users\\phobson\\work\\ipython\\IPython\\__init__.pyc, but c:\\python27\\lib\\site-packages\\ipython-0.13.1.dev-py2.7.egg is being added to sys.path\n", | |
" from pkg_resources import resource_stream\n" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Original data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"xtab_data.head(5) # show the top 5 rows " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Iron</th>\n", | |
" <th>Manganese</th>\n", | |
" <th>Copper </th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td><strong>2012-12-01</strong></td>\n", | |
" <td> 0.17</td>\n", | |
" <td> 5.95</td>\n", | |
" <td> 0.84</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>2012-12-02</strong></td>\n", | |
" <td> 3.25</td>\n", | |
" <td> 2.14</td>\n", | |
" <td> 3.32</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>2012-12-03</strong></td>\n", | |
" <td> 0.92</td>\n", | |
" <td> 3.11</td>\n", | |
" <td> 1.14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>2012-12-04</strong></td>\n", | |
" <td> 1.07</td>\n", | |
" <td> 2.57</td>\n", | |
" <td> 2.29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>2012-12-05</strong></td>\n", | |
" <td> 1.00</td>\n", | |
" <td> 3.74</td>\n", | |
" <td> 0.05</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
" Iron Manganese Copper \n", | |
"Date \n", | |
"2012-12-01 0.17 5.95 0.84\n", | |
"2012-12-02 3.25 2.14 3.32\n", | |
"2012-12-03 0.92 3.11 1.14\n", | |
"2012-12-04 1.07 2.57 2.29\n", | |
"2012-12-05 1.00 3.74 0.05" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Un-crosstabbed data\n", | |
"(note: the date column of the excel file is filled in)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"flat_data.index.names = ['Date', 'Parameter'] # totally optional\n", | |
"flat_data.rename(columns={0:'Concentration'}, inplace=True) # option too\n", | |
"flat_data.to_excel('flat_data.xlsx')\n", | |
"flat_data.head(15) # top 15 rows" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Concentration</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th>Parameter</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td rowspan=\"3\" valign=\"top\"><strong>2012-12-01</strong></td>\n", | |
" <td><strong>Iron</strong></td>\n", | |
" <td> 0.17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Manganese</strong></td>\n", | |
" <td> 5.95</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Copper </strong></td>\n", | |
" <td> 0.84</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td rowspan=\"3\" valign=\"top\"><strong>2012-12-02</strong></td>\n", | |
" <td><strong>Iron</strong></td>\n", | |
" <td> 3.25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Manganese</strong></td>\n", | |
" <td> 2.14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Copper </strong></td>\n", | |
" <td> 3.32</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td rowspan=\"3\" valign=\"top\"><strong>2012-12-03</strong></td>\n", | |
" <td><strong>Iron</strong></td>\n", | |
" <td> 0.92</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Manganese</strong></td>\n", | |
" <td> 3.11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Copper </strong></td>\n", | |
" <td> 1.14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td rowspan=\"3\" valign=\"top\"><strong>2012-12-04</strong></td>\n", | |
" <td><strong>Iron</strong></td>\n", | |
" <td> 1.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Manganese</strong></td>\n", | |
" <td> 2.57</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Copper </strong></td>\n", | |
" <td> 2.29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td rowspan=\"3\" valign=\"top\"><strong>2012-12-05</strong></td>\n", | |
" <td><strong>Iron</strong></td>\n", | |
" <td> 1.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Manganese</strong></td>\n", | |
" <td> 3.74</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td><strong>Copper </strong></td>\n", | |
" <td> 0.05</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
" Concentration\n", | |
"Date Parameter \n", | |
"2012-12-01 Iron 0.17\n", | |
" Manganese 5.95\n", | |
" Copper 0.84\n", | |
"2012-12-02 Iron 3.25\n", | |
" Manganese 2.14\n", | |
" Copper 3.32\n", | |
"2012-12-03 Iron 0.92\n", | |
" Manganese 3.11\n", | |
" Copper 1.14\n", | |
"2012-12-04 Iron 1.07\n", | |
" Manganese 2.57\n", | |
" Copper 2.29\n", | |
"2012-12-05 Iron 1.00\n", | |
" Manganese 3.74\n", | |
" Copper 0.05" | |
] | |
} | |
], | |
"prompt_number": 3 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment