Created
January 28, 2015 15:05
-
-
Save phobson/37f20159e56120261daf to your computer and use it in GitHub Desktop.
precip.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:374fe606e58df33dda5096327ddcbcb3ac02abd904757d23de4272c736d8f837" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import pandas\n", | |
"\n", | |
"def defineStorms(hydrodata, precipcol=None, inflowcol=None, outflowcol=None,\n", | |
" minprecip=0.01, minflow=0.01, intereventperiods=36,\n", | |
" standardizeColNames=True, outputfreqMinutes=10,\n", | |
" debug=False, stormcol='storm'):\n", | |
" '''\n", | |
" Loops through the hydrologic records and parses the data into storms.\n", | |
" In this context, a storm is defined as starting whenever the\n", | |
" hydrologic records shows non-zero precipitation or [in|out]flow\n", | |
" from the BMP after a minimum inter-event dry period duration\n", | |
" specified in the the function call. A new column (`storm`) is\n", | |
" added to the DataFrame, of copy of which is returned.\n", | |
" Input:\n", | |
" hydrodata : pandas.DataFrame\n", | |
" DataFrame of hydrologic data of the storm. Should contain\n", | |
" a unique index of type pandas.DatetimeIndex.\n", | |
" precipcol : optional string (default = None)\n", | |
" Name of column in `hydrodata` containing precipiation data.\n", | |
" inflowcol : optional string (default = None)\n", | |
" Name of column in `hydrodata` containing influent flow data.\n", | |
" outflowcol : optional string (default = None)\n", | |
" Name of column in `hydrodata` containing effluent flow data.\n", | |
" minprecip : optional float (default = 0.01)\n", | |
" The minimum incremental precipiation depth required to be\n", | |
" considered part of a storm.\n", | |
" minflow : optional float (default = 0.01)\n", | |
" The minimum incremental volumetric flowrate required to be\n", | |
" considered part of a storm.\n", | |
" intereventperiods : optional int (default = 36)\n", | |
" The number of dry records (no flow or rain) required to end\n", | |
" a storm.\n", | |
" standardizeColNames : optional bool (default = True)\n", | |
" Toggles renaming columns to standard names in the returned\n", | |
" DataFrame.\n", | |
" outputfreqMinutes : optional int (default = 10)\n", | |
" The default frequency (minutes) to which all data will be\n", | |
" resampled. Precipitation data will be summed up across '\n", | |
" multiple timesteps during resampling, while flow will be\n", | |
" averaged.\n", | |
" debug : bool (default = False)\n", | |
" If True, diagnostic columns will not be dropped prior to\n", | |
" returning the dataframe of parsed_storms.\n", | |
" Writes:\n", | |
" None\n", | |
" Returns:\n", | |
" parsed_storms : pandas.DataFrame\n", | |
" Copy of the origin `hydrodata` DataFrame, but resmapled to\n", | |
" a fixed frequency, columns possibly renamed, and a `storm`\n", | |
" column added to denote the storm to which each record\n", | |
" belongs. Records where `storm` == 0 are not a part of any\n", | |
" storm.\n", | |
" '''\n", | |
"\n", | |
" # validate input\n", | |
" if precipcol is None and inflowcol is None and outflowcol is None:\n", | |
" msg = '`hydrodata` must have at least a precip or in/outflow column'\n", | |
" raise ValueError(msg)\n", | |
"\n", | |
" # pull out the rain and flow data\n", | |
" if precipcol is None:\n", | |
" precipcol = 'precip'\n", | |
" hydrodata[precipcol] = np.nan\n", | |
"\n", | |
" if inflowcol is None:\n", | |
" inflowcol = 'inflow'\n", | |
" hydrodata[inflowcol] = np.nan\n", | |
"\n", | |
" if outflowcol is None:\n", | |
" outflowcol = 'outflow'\n", | |
" hydrodata[outflowcol] = np.nan\n", | |
"\n", | |
" # bool column where True means there's rain or flow of some kind\n", | |
" hydrodata['wet'] = hydrodata.apply(\n", | |
" lambda r: (r[precipcol] >= minprecip or\n", | |
" r[inflowcol] >= minflow or\n", | |
" r[outflowcol] >= minflow),\n", | |
" axis=1\n", | |
" )\n", | |
"\n", | |
" # copy the bool column into its own df and add a bunch\n", | |
" # shifted columns so each row looks backwards and forwards\n", | |
" hydrodata['windiff'] = pandas.rolling_apply(\n", | |
" hydrodata['wet'],\n", | |
" intereventperiods,\n", | |
" lambda x: x.any(),\n", | |
" min_periods=1\n", | |
" ).diff()\n", | |
"\n", | |
" firstrow = hydrodata.iloc[0]\n", | |
" if firstrow['wet']:\n", | |
" hydrodata.loc[firstrow.name, 'windiff'] = 1\n", | |
"\n", | |
" hydrodata['event_start'] = False\n", | |
" hydrodata['event_end'] = False\n", | |
"\n", | |
" starts = hydrodata['windiff'] == 1\n", | |
" hydrodata.loc[starts, 'event_start'] = True\n", | |
"\n", | |
" stops = hydrodata['windiff'].shift(-1 * intereventperiods) == -1\n", | |
" hydrodata.loc[stops, 'event_end'] = True\n", | |
"\n", | |
" # initialize the new column as zeros\n", | |
" hydrodata[stormcol] = 0\n", | |
"\n", | |
" # each time a storm starts, incriment the storm number + 1\n", | |
" hydrodata[stormcol] = hydrodata['event_start'].cumsum()\n", | |
"\n", | |
" # periods between storms are where the cumulative number\n", | |
" # of storms that have ended are equal to the cumulative\n", | |
" # number of storms that have started.\n", | |
" # Stack Overflow: http://tinyurl.com/lsjkr9x\n", | |
" nostorm = hydrodata[stormcol] == hydrodata['event_end'].shift(2).cumsum()\n", | |
" hydrodata.loc[nostorm, stormcol] = 0\n", | |
"\n", | |
" if standardizeColNames:\n", | |
" coldict = {\n", | |
" precipcol: 'precip',\n", | |
" inflowcol: 'inflow',\n", | |
" outflowcol: 'outflow'\n", | |
" }\n", | |
" hydrodata.rename(columns=coldict, inplace=True)\n", | |
"\n", | |
" #hydrodata['storm'] = iswet['storm']\n", | |
" if not debug:\n", | |
" cols_to_drop = ['wet', 'windiff', 'event_end', 'event_start']\n", | |
" hydrodata.drop(cols_to_drop, axis=1, inplace=True)\n", | |
"\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 32 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = pandas.read_csv(\"/Users/paul/sources/python-metar/data/KPDX/asos/compile/KPDX.csv\", parse_dates=True, index_col=['Date'])\n", | |
"df.head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sta</th>\n", | |
" <th>Precip</th>\n", | |
" <th>Temp</th>\n", | |
" <th>DewPnt</th>\n", | |
" <th>WindSpd</th>\n", | |
" <th>WindDir</th>\n", | |
" <th>AtmPress</th>\n", | |
" <th>SkyCover</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:00:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:05:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:10:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:15:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:20:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 36, | |
"text": [ | |
" Sta Precip Temp DewPnt WindSpd WindDir AtmPress \\\n", | |
"Date \n", | |
"2008-10-01 00:00:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:05:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:10:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:15:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:20:00 KPDX 0 14 12 0 0 30.01 \n", | |
"\n", | |
" SkyCover \n", | |
"Date \n", | |
"2008-10-01 00:00:00 0 \n", | |
"2008-10-01 00:05:00 0 \n", | |
"2008-10-01 00:10:00 0 \n", | |
"2008-10-01 00:15:00 0 \n", | |
"2008-10-01 00:20:00 0 " | |
] | |
} | |
], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"freqminutes = 5\n", | |
"minutesperhour = 60\n", | |
"\n", | |
"for ivhours in [3, 6, 9, 12, 18, 24]:\n", | |
" now = datetime.datetime.now()\n", | |
" print(ivhours, now)\n", | |
" ivperiods = ivhours * minutesperhour / freqminutes\n", | |
" defineStorms(df, precipcol='Precip', outputfreqMinutes=freqminutes, standardizeColNames=False,\n", | |
" intereventperiods=ivperiods, stormcol='storm_{:02d}'.format(ivhours))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(3, datetime.datetime(2015, 1, 27, 23, 38, 28, 3938))\n", | |
"(6, datetime.datetime(2015, 1, 27, 23, 38, 55, 84290))" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"(9, datetime.datetime(2015, 1, 27, 23, 39, 21, 216589))" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"(12, datetime.datetime(2015, 1, 27, 23, 39, 47, 849466))" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"(18, datetime.datetime(2015, 1, 27, 23, 40, 14, 167582))" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"(24, datetime.datetime(2015, 1, 27, 23, 40, 40, 766451))" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 37 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print(ivhours, now)\n", | |
"df.head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(24, datetime.datetime(2015, 1, 27, 23, 40, 40, 766451))\n" | |
] | |
}, | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sta</th>\n", | |
" <th>Precip</th>\n", | |
" <th>Temp</th>\n", | |
" <th>DewPnt</th>\n", | |
" <th>WindSpd</th>\n", | |
" <th>WindDir</th>\n", | |
" <th>AtmPress</th>\n", | |
" <th>SkyCover</th>\n", | |
" <th>inflow</th>\n", | |
" <th>outflow</th>\n", | |
" <th>storm_03</th>\n", | |
" <th>storm_06</th>\n", | |
" <th>storm_09</th>\n", | |
" <th>storm_12</th>\n", | |
" <th>storm_18</th>\n", | |
" <th>storm_24</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:00:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:05:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:10:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:15:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2008-10-01 00:20:00</th>\n", | |
" <td> KPDX</td>\n", | |
" <td> 0</td>\n", | |
" <td> 14</td>\n", | |
" <td> 12</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30.01</td>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 38, | |
"text": [ | |
" Sta Precip Temp DewPnt WindSpd WindDir AtmPress \\\n", | |
"Date \n", | |
"2008-10-01 00:00:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:05:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:10:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:15:00 KPDX 0 14 12 0 0 30.01 \n", | |
"2008-10-01 00:20:00 KPDX 0 14 12 0 0 30.01 \n", | |
"\n", | |
" SkyCover inflow outflow storm_03 storm_06 storm_09 \\\n", | |
"Date \n", | |
"2008-10-01 00:00:00 0 NaN NaN 0 0 0 \n", | |
"2008-10-01 00:05:00 0 NaN NaN 0 0 0 \n", | |
"2008-10-01 00:10:00 0 NaN NaN 0 0 0 \n", | |
"2008-10-01 00:15:00 0 NaN NaN 0 0 0 \n", | |
"2008-10-01 00:20:00 0 NaN NaN 0 0 0 \n", | |
"\n", | |
" storm_12 storm_18 storm_24 \n", | |
"Date \n", | |
"2008-10-01 00:00:00 0 0 0 \n", | |
"2008-10-01 00:05:00 0 0 0 \n", | |
"2008-10-01 00:10:00 0 0 0 \n", | |
"2008-10-01 00:15:00 0 0 0 \n", | |
"2008-10-01 00:20:00 0 0 0 " | |
] | |
} | |
], | |
"prompt_number": 38 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment