Skip to content

Instantly share code, notes, and snippets.

@phobson
Created January 28, 2015 15:05
Show Gist options
  • Save phobson/37f20159e56120261daf to your computer and use it in GitHub Desktop.
Save phobson/37f20159e56120261daf to your computer and use it in GitHub Desktop.
precip.ipynb
{
"metadata": {
"name": "",
"signature": "sha256:374fe606e58df33dda5096327ddcbcb3ac02abd904757d23de4272c736d8f837"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas\n",
"\n",
"def defineStorms(hydrodata, precipcol=None, inflowcol=None, outflowcol=None,\n",
" minprecip=0.01, minflow=0.01, intereventperiods=36,\n",
" standardizeColNames=True, outputfreqMinutes=10,\n",
" debug=False, stormcol='storm'):\n",
" '''\n",
" Loops through the hydrologic records and parses the data into storms.\n",
" In this context, a storm is defined as starting whenever the\n",
" hydrologic records shows non-zero precipitation or [in|out]flow\n",
" from the BMP after a minimum inter-event dry period duration\n",
" specified in the the function call. A new column (`storm`) is\n",
" added to the DataFrame, of copy of which is returned.\n",
" Input:\n",
" hydrodata : pandas.DataFrame\n",
" DataFrame of hydrologic data of the storm. Should contain\n",
" a unique index of type pandas.DatetimeIndex.\n",
" precipcol : optional string (default = None)\n",
" Name of column in `hydrodata` containing precipiation data.\n",
" inflowcol : optional string (default = None)\n",
" Name of column in `hydrodata` containing influent flow data.\n",
" outflowcol : optional string (default = None)\n",
" Name of column in `hydrodata` containing effluent flow data.\n",
" minprecip : optional float (default = 0.01)\n",
" The minimum incremental precipiation depth required to be\n",
" considered part of a storm.\n",
" minflow : optional float (default = 0.01)\n",
" The minimum incremental volumetric flowrate required to be\n",
" considered part of a storm.\n",
" intereventperiods : optional int (default = 36)\n",
" The number of dry records (no flow or rain) required to end\n",
" a storm.\n",
" standardizeColNames : optional bool (default = True)\n",
" Toggles renaming columns to standard names in the returned\n",
" DataFrame.\n",
" outputfreqMinutes : optional int (default = 10)\n",
" The default frequency (minutes) to which all data will be\n",
" resampled. Precipitation data will be summed up across '\n",
" multiple timesteps during resampling, while flow will be\n",
" averaged.\n",
" debug : bool (default = False)\n",
" If True, diagnostic columns will not be dropped prior to\n",
" returning the dataframe of parsed_storms.\n",
" Writes:\n",
" None\n",
" Returns:\n",
" parsed_storms : pandas.DataFrame\n",
" Copy of the origin `hydrodata` DataFrame, but resmapled to\n",
" a fixed frequency, columns possibly renamed, and a `storm`\n",
" column added to denote the storm to which each record\n",
" belongs. Records where `storm` == 0 are not a part of any\n",
" storm.\n",
" '''\n",
"\n",
" # validate input\n",
" if precipcol is None and inflowcol is None and outflowcol is None:\n",
" msg = '`hydrodata` must have at least a precip or in/outflow column'\n",
" raise ValueError(msg)\n",
"\n",
" # pull out the rain and flow data\n",
" if precipcol is None:\n",
" precipcol = 'precip'\n",
" hydrodata[precipcol] = np.nan\n",
"\n",
" if inflowcol is None:\n",
" inflowcol = 'inflow'\n",
" hydrodata[inflowcol] = np.nan\n",
"\n",
" if outflowcol is None:\n",
" outflowcol = 'outflow'\n",
" hydrodata[outflowcol] = np.nan\n",
"\n",
" # bool column where True means there's rain or flow of some kind\n",
" hydrodata['wet'] = hydrodata.apply(\n",
" lambda r: (r[precipcol] >= minprecip or\n",
" r[inflowcol] >= minflow or\n",
" r[outflowcol] >= minflow),\n",
" axis=1\n",
" )\n",
"\n",
" # copy the bool column into its own df and add a bunch\n",
" # shifted columns so each row looks backwards and forwards\n",
" hydrodata['windiff'] = pandas.rolling_apply(\n",
" hydrodata['wet'],\n",
" intereventperiods,\n",
" lambda x: x.any(),\n",
" min_periods=1\n",
" ).diff()\n",
"\n",
" firstrow = hydrodata.iloc[0]\n",
" if firstrow['wet']:\n",
" hydrodata.loc[firstrow.name, 'windiff'] = 1\n",
"\n",
" hydrodata['event_start'] = False\n",
" hydrodata['event_end'] = False\n",
"\n",
" starts = hydrodata['windiff'] == 1\n",
" hydrodata.loc[starts, 'event_start'] = True\n",
"\n",
" stops = hydrodata['windiff'].shift(-1 * intereventperiods) == -1\n",
" hydrodata.loc[stops, 'event_end'] = True\n",
"\n",
" # initialize the new column as zeros\n",
" hydrodata[stormcol] = 0\n",
"\n",
" # each time a storm starts, incriment the storm number + 1\n",
" hydrodata[stormcol] = hydrodata['event_start'].cumsum()\n",
"\n",
" # periods between storms are where the cumulative number\n",
" # of storms that have ended are equal to the cumulative\n",
" # number of storms that have started.\n",
" # Stack Overflow: http://tinyurl.com/lsjkr9x\n",
" nostorm = hydrodata[stormcol] == hydrodata['event_end'].shift(2).cumsum()\n",
" hydrodata.loc[nostorm, stormcol] = 0\n",
"\n",
" if standardizeColNames:\n",
" coldict = {\n",
" precipcol: 'precip',\n",
" inflowcol: 'inflow',\n",
" outflowcol: 'outflow'\n",
" }\n",
" hydrodata.rename(columns=coldict, inplace=True)\n",
"\n",
" #hydrodata['storm'] = iswet['storm']\n",
" if not debug:\n",
" cols_to_drop = ['wet', 'windiff', 'event_end', 'event_start']\n",
" hydrodata.drop(cols_to_drop, axis=1, inplace=True)\n",
"\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pandas.read_csv(\"/Users/paul/sources/python-metar/data/KPDX/asos/compile/KPDX.csv\", parse_dates=True, index_col=['Date'])\n",
"df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sta</th>\n",
" <th>Precip</th>\n",
" <th>Temp</th>\n",
" <th>DewPnt</th>\n",
" <th>WindSpd</th>\n",
" <th>WindDir</th>\n",
" <th>AtmPress</th>\n",
" <th>SkyCover</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2008-10-01 00:00:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:05:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:10:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:15:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:20:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 36,
"text": [
" Sta Precip Temp DewPnt WindSpd WindDir AtmPress \\\n",
"Date \n",
"2008-10-01 00:00:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:05:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:10:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:15:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:20:00 KPDX 0 14 12 0 0 30.01 \n",
"\n",
" SkyCover \n",
"Date \n",
"2008-10-01 00:00:00 0 \n",
"2008-10-01 00:05:00 0 \n",
"2008-10-01 00:10:00 0 \n",
"2008-10-01 00:15:00 0 \n",
"2008-10-01 00:20:00 0 "
]
}
],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"freqminutes = 5\n",
"minutesperhour = 60\n",
"\n",
"for ivhours in [3, 6, 9, 12, 18, 24]:\n",
" now = datetime.datetime.now()\n",
" print(ivhours, now)\n",
" ivperiods = ivhours * minutesperhour / freqminutes\n",
" defineStorms(df, precipcol='Precip', outputfreqMinutes=freqminutes, standardizeColNames=False,\n",
" intereventperiods=ivperiods, stormcol='storm_{:02d}'.format(ivhours))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(3, datetime.datetime(2015, 1, 27, 23, 38, 28, 3938))\n",
"(6, datetime.datetime(2015, 1, 27, 23, 38, 55, 84290))"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"(9, datetime.datetime(2015, 1, 27, 23, 39, 21, 216589))"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"(12, datetime.datetime(2015, 1, 27, 23, 39, 47, 849466))"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"(18, datetime.datetime(2015, 1, 27, 23, 40, 14, 167582))"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"(24, datetime.datetime(2015, 1, 27, 23, 40, 40, 766451))"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(ivhours, now)\n",
"df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(24, datetime.datetime(2015, 1, 27, 23, 40, 40, 766451))\n"
]
},
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sta</th>\n",
" <th>Precip</th>\n",
" <th>Temp</th>\n",
" <th>DewPnt</th>\n",
" <th>WindSpd</th>\n",
" <th>WindDir</th>\n",
" <th>AtmPress</th>\n",
" <th>SkyCover</th>\n",
" <th>inflow</th>\n",
" <th>outflow</th>\n",
" <th>storm_03</th>\n",
" <th>storm_06</th>\n",
" <th>storm_09</th>\n",
" <th>storm_12</th>\n",
" <th>storm_18</th>\n",
" <th>storm_24</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2008-10-01 00:00:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:05:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:10:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:15:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2008-10-01 00:20:00</th>\n",
" <td> KPDX</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30.01</td>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 38,
"text": [
" Sta Precip Temp DewPnt WindSpd WindDir AtmPress \\\n",
"Date \n",
"2008-10-01 00:00:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:05:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:10:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:15:00 KPDX 0 14 12 0 0 30.01 \n",
"2008-10-01 00:20:00 KPDX 0 14 12 0 0 30.01 \n",
"\n",
" SkyCover inflow outflow storm_03 storm_06 storm_09 \\\n",
"Date \n",
"2008-10-01 00:00:00 0 NaN NaN 0 0 0 \n",
"2008-10-01 00:05:00 0 NaN NaN 0 0 0 \n",
"2008-10-01 00:10:00 0 NaN NaN 0 0 0 \n",
"2008-10-01 00:15:00 0 NaN NaN 0 0 0 \n",
"2008-10-01 00:20:00 0 NaN NaN 0 0 0 \n",
"\n",
" storm_12 storm_18 storm_24 \n",
"Date \n",
"2008-10-01 00:00:00 0 0 0 \n",
"2008-10-01 00:05:00 0 0 0 \n",
"2008-10-01 00:10:00 0 0 0 \n",
"2008-10-01 00:15:00 0 0 0 \n",
"2008-10-01 00:20:00 0 0 0 "
]
}
],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment