Skip to content

Instantly share code, notes, and snippets.

@phobson
Last active December 15, 2015 19:49
Show Gist options
  • Save phobson/5314570 to your computer and use it in GitHub Desktop.
Save phobson/5314570 to your computer and use it in GitHub Desktop.
Units standardization using pandas. View it here: http://nbviewer.ipython.org/5314570
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "units conversion"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Import and units look-up dictionary"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas\n",
"import StringIO\n",
"units_map = {\n",
" 'ug/L': 1e-6,\n",
" 'mg/L': 1e-3,\n",
" 'g/L' : 1e+0,\n",
"}"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Mock up some data, define the target units"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_csv = StringIO.StringIO(\"\"\"\\\n",
"storm,param,station,units,conc\n",
"1,lead,inflow,ug/L,10\n",
"2,lead,inflow,mg/L,0.02\n",
"3,lead,inflow,g/L,0.00003\n",
"4,lead,inflow,ug/L,40\n",
"1,lead,outflow,mg/L,0.05\n",
"2,lead,outflow,ug/L,60\n",
"3,lead,outflow,ug/L,70\n",
"4,lead,outflow,g/L,0.00008\"\"\")\n",
"data = pandas.read_csv(data_csv)\n",
"\n",
"target_units = 'ug/L'\n",
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>storm</th>\n",
" <th>param</th>\n",
" <th>station</th>\n",
" <th>units</th>\n",
" <th>conc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 10.00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> mg/L</td>\n",
" <td> 0.02000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 3</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> g/L</td>\n",
" <td> 0.00003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 4</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 40.00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 1</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> mg/L</td>\n",
" <td> 0.05000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 2</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 60.00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 3</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 70.00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 4</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> g/L</td>\n",
" <td> 0.00008</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 10,
"text": [
" storm param station units conc\n",
"0 1 lead inflow ug/L 10.00000\n",
"1 2 lead inflow mg/L 0.02000\n",
"2 3 lead inflow g/L 0.00003\n",
"3 4 lead inflow ug/L 40.00000\n",
"4 1 lead outflow mg/L 0.05000\n",
"5 2 lead outflow ug/L 60.00000\n",
"6 3 lead outflow ug/L 70.00000\n",
"7 4 lead outflow g/L 0.00008"
]
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Compute factors needed to normalize the units\n",
"`get` is a function/method of dictaries that returns the value based on the key. Here we're mapping that function to all of the records in `data['unit']`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"normalziation = data['units'].map(units_map.get) \n",
"normalziation"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 11,
"text": [
"0 0.000001\n",
"1 0.001000\n",
"2 1.000000\n",
"3 0.000001\n",
"4 0.001000\n",
"5 0.000001\n",
"6 0.000001\n",
"7 1.000000\n",
"Name: units"
]
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Normalize and convert to ug/L in one operation and update the `units` column too."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data['conc'] = data['conc'] * normalziation/units_map[target_units]\n",
"data['units'] = target_units\n",
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>storm</th>\n",
" <th>param</th>\n",
" <th>station</th>\n",
" <th>units</th>\n",
" <th>conc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 3</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 4</td>\n",
" <td> lead</td>\n",
" <td> inflow</td>\n",
" <td> ug/L</td>\n",
" <td> 40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 1</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 2</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 3</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 4</td>\n",
" <td> lead</td>\n",
" <td> outflow</td>\n",
" <td> ug/L</td>\n",
" <td> 80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 12,
"text": [
" storm param station units conc\n",
"0 1 lead inflow ug/L 10\n",
"1 2 lead inflow ug/L 20\n",
"2 3 lead inflow ug/L 30\n",
"3 4 lead inflow ug/L 40\n",
"4 1 lead outflow ug/L 50\n",
"5 2 lead outflow ug/L 60\n",
"6 3 lead outflow ug/L 70\n",
"7 4 lead outflow ug/L 80"
]
}
],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Now unstack, rename, and organize the table"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# create an index\n",
"data.set_index(['storm', 'param', 'station'], inplace=True)\n",
"\n",
"# unstack/cross tab based on inflow/outflow\n",
"table = data.unstack(level='station')\n",
"\n",
"# rename the column level labels\n",
"table.columns.names = ['Quantity', 'Monitoring Station']\n",
"\n",
"# reorder the column levels\n",
"table.columns = table.columns.swaplevel(0,1)\n",
"\n",
"# order the actual columns\n",
"table = table[[('inflow', 'conc'), ('inflow', 'units'), ('outflow', 'conc'), ('outflow', 'units')]]\n",
"table"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>Monitoring Station</th>\n",
" <th colspan=\"2\" halign=\"left\">inflow</th>\n",
" <th colspan=\"2\" halign=\"left\">outflow</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>Quantity</th>\n",
" <th>conc</th>\n",
" <th>units</th>\n",
" <th>conc</th>\n",
" <th>units</th>\n",
" </tr>\n",
" <tr>\n",
" <th>storm</th>\n",
" <th>param</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>lead</th>\n",
" <td> 10</td>\n",
" <td> ug/L</td>\n",
" <td> 50</td>\n",
" <td> ug/L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>lead</th>\n",
" <td> 20</td>\n",
" <td> ug/L</td>\n",
" <td> 60</td>\n",
" <td> ug/L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>lead</th>\n",
" <td> 30</td>\n",
" <td> ug/L</td>\n",
" <td> 70</td>\n",
" <td> ug/L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>lead</th>\n",
" <td> 40</td>\n",
" <td> ug/L</td>\n",
" <td> 80</td>\n",
" <td> ug/L</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 13,
"text": [
"Monitoring Station inflow inflow outflow outflow\n",
"Quantity conc units conc units\n",
"storm param \n",
"1 lead 10 ug/L 50 ug/L\n",
"2 lead 20 ug/L 60 ug/L\n",
"3 lead 30 ug/L 70 ug/L\n",
"4 lead 40 ug/L 80 ug/L"
]
}
],
"prompt_number": 13
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment