Created
August 3, 2017 05:01
-
-
Save jakevdp/73e0589562cf4a07b191d0659085e4f9 to your computer and use it in GitHub Desktop.
This didn't fit into a tweet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Pandas snippet for Roger Andre\n", | |
| "\n", | |
| "cf. https://twitter.com/lagerratrobe/status/892966724454055936" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Data Files" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Overwriting 2007_data.csv\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%file 2007_data.csv\n", | |
| "GEOBASID|STNAME|YEAR|AAWDT\n", | |
| "60150010|COLUMBIA ST ON RP|2007|9500\n", | |
| "65700010|SENECA ST OFF RP|2007|6800\n", | |
| "96200010|WESTERN ST ON RP|2007|7200\n", | |
| "104300010|WESTERN ST OFF RP|2007|6500" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Overwriting 2008_data.csv\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%file 2008_data.csv\n", | |
| "GEOBASID|STNAME|YEAR|AAWDT\n", | |
| "60150010|COLUMBIA ST ON RP|2008|9510\n", | |
| "65700010|SENECA ST OFF RP|2008|6810\n", | |
| "96200010|WESTERN ST ON RP|2008|7210\n", | |
| "104300010|WESTERN ST OFF RP|2008|6510" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Overwriting 2009_data.csv\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%file 2009_data.csv\n", | |
| "GEOBASID|STNAME|YEAR|AAWDT\n", | |
| "60150010|COLUMBIA ST ON RP|2009|9520\n", | |
| "65700010|SENECA ST OFF RP|2009|6820\n", | |
| "96200010|WESTERN ST ON RP|2009|7220\n", | |
| "104300010|WESTERN ST OFF RP|2009|6520" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Overwriting 2010_data.csv\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%file 2010_data.csv\n", | |
| "GEOBASID|STNAME|YEAR|AAWDT\n", | |
| "60150010|COLUMBIA ST ON RP|2010|9530\n", | |
| "65700010|SENECA ST OFF RP|2010|6830\n", | |
| "96200010|WESTERN ST ON RP|2010|7230\n", | |
| "104300010|WESTERN ST OFF RP|2010|6530" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Reading and Concatenating" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Concatenate data together in \"tidy\" format. To be honest, I'd stop here, because this is probably the cleanest way of storing all the data:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>GEOBASID</th>\n", | |
| " <th>STNAME</th>\n", | |
| " <th>YEAR</th>\n", | |
| " <th>AAWDT</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>60150010</td>\n", | |
| " <td>COLUMBIA ST ON RP</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>9500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>65700010</td>\n", | |
| " <td>SENECA ST OFF RP</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>6800</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>96200010</td>\n", | |
| " <td>WESTERN ST ON RP</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>7200</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>104300010</td>\n", | |
| " <td>WESTERN ST OFF RP</td>\n", | |
| " <td>2007</td>\n", | |
| " <td>6500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>60150010</td>\n", | |
| " <td>COLUMBIA ST ON RP</td>\n", | |
| " <td>2008</td>\n", | |
| " <td>9510</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " GEOBASID STNAME YEAR AAWDT\n", | |
| "0 60150010 COLUMBIA ST ON RP 2007 9500\n", | |
| "1 65700010 SENECA ST OFF RP 2007 6800\n", | |
| "2 96200010 WESTERN ST ON RP 2007 7200\n", | |
| "3 104300010 WESTERN ST OFF RP 2007 6500\n", | |
| "0 60150010 COLUMBIA ST ON RP 2008 9510" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "\n", | |
| "data = pd.concat([pd.read_csv('{0}_data.csv'.format(year), sep='|')\n", | |
| " for year in range(2007, 2011)])\n", | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Pivoting" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "If you want years as columns, using a pivot table is what you're looking for:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>YEAR</th>\n", | |
| " <th>2007</th>\n", | |
| " <th>2008</th>\n", | |
| " <th>2009</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>GEOBASID</th>\n", | |
| " <th>STNAME</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>60150010</th>\n", | |
| " <th>COLUMBIA ST ON RP</th>\n", | |
| " <td>9500</td>\n", | |
| " <td>9510</td>\n", | |
| " <td>9520</td>\n", | |
| " <td>9530</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>65700010</th>\n", | |
| " <th>SENECA ST OFF RP</th>\n", | |
| " <td>6800</td>\n", | |
| " <td>6810</td>\n", | |
| " <td>6820</td>\n", | |
| " <td>6830</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>96200010</th>\n", | |
| " <th>WESTERN ST ON RP</th>\n", | |
| " <td>7200</td>\n", | |
| " <td>7210</td>\n", | |
| " <td>7220</td>\n", | |
| " <td>7230</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>104300010</th>\n", | |
| " <th>WESTERN ST OFF RP</th>\n", | |
| " <td>6500</td>\n", | |
| " <td>6510</td>\n", | |
| " <td>6520</td>\n", | |
| " <td>6530</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "YEAR 2007 2008 2009 2010\n", | |
| "GEOBASID STNAME \n", | |
| "60150010 COLUMBIA ST ON RP 9500 9510 9520 9530\n", | |
| "65700010 SENECA ST OFF RP 6800 6810 6820 6830\n", | |
| "96200010 WESTERN ST ON RP 7200 7210 7220 7230\n", | |
| "104300010 WESTERN ST OFF RP 6500 6510 6520 6530" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pivoted = data.pivot_table(values='AAWDT', index=['GEOBASID', 'STNAME'], columns='YEAR')\n", | |
| "pivoted" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "If you want to turn the indices into normal columns, you can do this:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>YEAR</th>\n", | |
| " <th>GEOBASID</th>\n", | |
| " <th>STNAME</th>\n", | |
| " <th>2007</th>\n", | |
| " <th>2008</th>\n", | |
| " <th>2009</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>60150010</td>\n", | |
| " <td>COLUMBIA ST ON RP</td>\n", | |
| " <td>9500</td>\n", | |
| " <td>9510</td>\n", | |
| " <td>9520</td>\n", | |
| " <td>9530</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>65700010</td>\n", | |
| " <td>SENECA ST OFF RP</td>\n", | |
| " <td>6800</td>\n", | |
| " <td>6810</td>\n", | |
| " <td>6820</td>\n", | |
| " <td>6830</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>96200010</td>\n", | |
| " <td>WESTERN ST ON RP</td>\n", | |
| " <td>7200</td>\n", | |
| " <td>7210</td>\n", | |
| " <td>7220</td>\n", | |
| " <td>7230</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>104300010</td>\n", | |
| " <td>WESTERN ST OFF RP</td>\n", | |
| " <td>6500</td>\n", | |
| " <td>6510</td>\n", | |
| " <td>6520</td>\n", | |
| " <td>6530</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "YEAR GEOBASID STNAME 2007 2008 2009 2010\n", | |
| "0 60150010 COLUMBIA ST ON RP 9500 9510 9520 9530\n", | |
| "1 65700010 SENECA ST OFF RP 6800 6810 6820 6830\n", | |
| "2 96200010 WESTERN ST ON RP 7200 7210 7220 7230\n", | |
| "3 104300010 WESTERN ST OFF RP 6500 6510 6520 6530" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pivoted.reset_index()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python 3.6", | |
| "language": "python", | |
| "name": "python3.6" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment