Skip to content

Instantly share code, notes, and snippets.

@rutj3
Created June 27, 2013 13:47
Show Gist options
  • Save rutj3/5876538 to your computer and use it in GitHub Desktop.
Save rutj3/5876538 to your computer and use it in GitHub Desktop.
KNMI_labrijn
{
"metadata": {
"name": "KNMI_labrijn"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"pd.set_option('line_width', 180)\n",
"%cd 'D:\\\\01_Algemeen\\\\GIS_datasets\\KNMI\\\\'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"D:\\01_Algemeen\\GIS_datasets\\KNMI\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# read data\n",
"df = pd.read_csv('Labrijn.csv', skiprows=5, delim_whitespace=True, index_col='Year')\n",
"\n",
"df.columns = np.arange(1,13,1)\n",
"df.columns.names = ['Month']\n",
"\n",
"df = pd.DataFrame(df.stack('Month')).reset_index(level=1)\n",
"df.columns = ['Month','TG']\n",
"\n",
"df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Month</th>\n",
" <th>TG</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1706</th>\n",
" <td> 1</td>\n",
" <td> 0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1706</th>\n",
" <td> 2</td>\n",
" <td> 3.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1706</th>\n",
" <td> 3</td>\n",
" <td> 5.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1706</th>\n",
" <td> 4</td>\n",
" <td> 8.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1706</th>\n",
" <td> 5</td>\n",
" <td> 14.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 2,
"text": [
" Month TG\n",
"Year \n",
"1706 1 0.5\n",
"1706 2 3.2\n",
"1706 3 5.5\n",
"1706 4 8.7\n",
"1706 5 14.6"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# create anomaly by using the local climatology\n",
"# base period: \n",
"# start: year - (year % 10) - 29\n",
"# end year - (year % 10)\n",
"def normalize(group):\n",
" \n",
" end = int(group.name)\n",
" begin = end - 29\n",
" \n",
" dfnorm = df[(df.index >= begin) & (df.index <= end)].groupby('Month')['TG'].mean()\n",
" group['anom'] = group['TG'] - group.join(dfnorm, on='Month', rsuffix='_norm')['TG_norm']\n",
"\n",
" return group\n",
"\n",
"dfnew = df.groupby(lambda x: x - np.mod(x, 10)).apply(normalize)\n",
"dfnew = dfnew[(dfnew.index >= 1901) & (dfnew.index < 2013)]\n",
"\n",
"dfnew.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Month</th>\n",
" <th>TG</th>\n",
" <th>anom</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1901</th>\n",
" <td> 1</td>\n",
" <td> -0.3</td>\n",
" <td>-1.766667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1901</th>\n",
" <td> 2</td>\n",
" <td> -0.9</td>\n",
" <td>-3.516667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1901</th>\n",
" <td> 3</td>\n",
" <td> 3.5</td>\n",
" <td>-1.136667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1901</th>\n",
" <td> 4</td>\n",
" <td> 9.1</td>\n",
" <td> 0.973333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1901</th>\n",
" <td> 5</td>\n",
" <td> 12.2</td>\n",
" <td> 0.483333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 4,
"text": [
" Month TG anom\n",
"Year \n",
"1901 1 -0.3 -1.766667\n",
"1901 2 -0.9 -3.516667\n",
"1901 3 3.5 -1.136667\n",
"1901 4 9.1 0.973333\n",
"1901 5 12.2 0.483333"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dfnew['Cold'] = (dfnew['anom'] < -0.5)\n",
"dfnew['block'] = (dfnew['Cold'].shift(1) != dfnew['Cold']).astype(int).cumsum()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def summarize(group):\n",
" \n",
" count = group['block'].count()\n",
" start = pd.datetime.strptime('%04i-%02i' % (group.index[0], group.Month.iloc[0]), '%Y-%m')\n",
" end = pd.datetime.strptime('%04i-%02i' % (group.index[-1], group.Month.iloc[-1]), '%Y-%m')\n",
" TG_mean = group.TG.mean()\n",
" anom_mean = group.anom.mean()\n",
" \n",
" # columns selection is just for reordering the columns\n",
" return pd.Series(data={'count': count, 'start': start, 'end': end, 'TG_mean': TG_mean, 'anom': anom_mean})[['start', 'end', 'count', 'TG_mean', 'anom']]\n",
"\n",
"# groupby the unique blocks and summerize the properties of each group\n",
"dfnew_sum = dfnew.groupby('block').apply(summarize)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print dfnew_sum[(dfnew_sum['count'] >= 6) & (dfnew_sum['anom'] < 0)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" start end count TG_mean anom\n",
"block \n",
"9 1902-07-01 00:00:00 1902-12-01 00:00:00 6 9.416667 -1.350556\n",
"107 1923-11-01 00:00:00 1924-04-01 00:00:00 6 2.283333 -1.966667\n",
"261 1962-03-01 00:00:00 1962-09-01 00:00:00 7 10.94286 -1.851429\n",
"279 1965-03-01 00:00:00 1965-09-01 00:00:00 7 11.52857 -1.265714\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment