rutj3 · June 27, 2013 13:47
diff --git a/gistfile1.txt b/gistfile1.txt
 {
 "metadata": {
  "name": "KNMI_labrijn"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "pd.set_option('line_width', 180)\n",
      "%cd 'D:\\\\01_Algemeen\\\\GIS_datasets\\KNMI\\\\'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "D:\\01_Algemeen\\GIS_datasets\\KNMI\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# read data\n",
      "df = pd.read_csv('Labrijn.csv', skiprows=5, delim_whitespace=True, index_col='Year')\n",
      "\n",
      "df.columns = np.arange(1,13,1)\n",
      "df.columns.names = ['Month']\n",
      "\n",
      "df = pd.DataFrame(df.stack('Month')).reset_index(level=1)\n",
      "df.columns = ['Month','TG']\n",
      "\n",
      "df.head()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Month</th>\n",
        "      <th>TG</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>Year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1706</th>\n",
        "      <td> 1</td>\n",
        "      <td>  0.5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1706</th>\n",
        "      <td> 2</td>\n",
        "      <td>  3.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1706</th>\n",
        "      <td> 3</td>\n",
        "      <td>  5.5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1706</th>\n",
        "      <td> 4</td>\n",
        "      <td>  8.7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1706</th>\n",
        "      <td> 5</td>\n",
        "      <td> 14.6</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "      Month    TG\n",
        "Year             \n",
        "1706      1   0.5\n",
        "1706      2   3.2\n",
        "1706      3   5.5\n",
        "1706      4   8.7\n",
        "1706      5  14.6"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# create anomaly by using the local climatology\n",
      "# base period: \n",
      "# start: year - (year % 10) - 29\n",
      "# end    year - (year % 10)\n",
      "def normalize(group):\n",
      "    \n",
      "    end = int(group.name)\n",
      "    begin = end - 29\n",
      "    \n",
      "    dfnorm = df[(df.index >= begin) & (df.index <= end)].groupby('Month')['TG'].mean()\n",
      "    group['anom'] = group['TG'] - group.join(dfnorm, on='Month', rsuffix='_norm')['TG_norm']\n",
      "\n",
      "    return group\n",
      "\n",
      "dfnew = df.groupby(lambda x: x - np.mod(x, 10)).apply(normalize)\n",
      "dfnew = dfnew[(dfnew.index >= 1901) & (dfnew.index < 2013)]\n",
      "\n",
      "dfnew.head()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Month</th>\n",
        "      <th>TG</th>\n",
        "      <th>anom</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>Year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1901</th>\n",
        "      <td> 1</td>\n",
        "      <td> -0.3</td>\n",
        "      <td>-1.766667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1901</th>\n",
        "      <td> 2</td>\n",
        "      <td> -0.9</td>\n",
        "      <td>-3.516667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1901</th>\n",
        "      <td> 3</td>\n",
        "      <td>  3.5</td>\n",
        "      <td>-1.136667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1901</th>\n",
        "      <td> 4</td>\n",
        "      <td>  9.1</td>\n",
        "      <td> 0.973333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1901</th>\n",
        "      <td> 5</td>\n",
        "      <td> 12.2</td>\n",
        "      <td> 0.483333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "      Month    TG      anom\n",
        "Year                       \n",
        "1901      1  -0.3 -1.766667\n",
        "1901      2  -0.9 -3.516667\n",
        "1901      3   3.5 -1.136667\n",
        "1901      4   9.1  0.973333\n",
        "1901      5  12.2  0.483333"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dfnew['Cold'] = (dfnew['anom'] < -0.5)\n",
      "dfnew['block'] = (dfnew['Cold'].shift(1) != dfnew['Cold']).astype(int).cumsum()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def summarize(group):\n",
      "    \n",
      "    count = group['block'].count()\n",
      "    start = pd.datetime.strptime('%04i-%02i' % (group.index[0], group.Month.iloc[0]), '%Y-%m')\n",
      "    end = pd.datetime.strptime('%04i-%02i' % (group.index[-1], group.Month.iloc[-1]), '%Y-%m')\n",
      "    TG_mean = group.TG.mean()\n",
      "    anom_mean = group.anom.mean()\n",
      "    \n",
      "    # columns selection is just for reordering the columns\n",
      "    return pd.Series(data={'count': count, 'start': start, 'end': end, 'TG_mean': TG_mean, 'anom': anom_mean})[['start', 'end', 'count', 'TG_mean', 'anom']]\n",
      "\n",
      "# groupby the unique blocks and summerize the properties of each group\n",
      "dfnew_sum = dfnew.groupby('block').apply(summarize)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print dfnew_sum[(dfnew_sum['count'] >= 6) & (dfnew_sum['anom'] < 0)]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "                     start                  end count   TG_mean      anom\n",
        "block                                                                    \n",
        "9      1902-07-01 00:00:00  1902-12-01 00:00:00     6  9.416667 -1.350556\n",
        "107    1923-11-01 00:00:00  1924-04-01 00:00:00     6  2.283333 -1.966667\n",
        "261    1962-03-01 00:00:00  1962-09-01 00:00:00     7  10.94286 -1.851429\n",
        "279    1965-03-01 00:00:00  1965-09-01 00:00:00     7  11.52857 -1.265714\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
 }
	{
	"metadata": {
	"name": "KNMI_labrijn"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import pandas as pd\n",
	"pd.set_option('line_width', 180)\n",
	"%cd 'D:\\\\01_Algemeen\\\\GIS_datasets\\KNMI\\\\'"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"D:\\01_Algemeen\\GIS_datasets\\KNMI\n"
	]
	}
	],
	"prompt_number": 1
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# read data\n",
	"df = pd.read_csv('Labrijn.csv', skiprows=5, delim_whitespace=True, index_col='Year')\n",
	"\n",
	"df.columns = np.arange(1,13,1)\n",
	"df.columns.names = ['Month']\n",
	"\n",
	"df = pd.DataFrame(df.stack('Month')).reset_index(level=1)\n",
	"df.columns = ['Month','TG']\n",
	"\n",
	"df.head()"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"html": [
	"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Month</th>\n",
	" <th>TG</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>Year</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1706</th>\n",
	" <td> 1</td>\n",
	" <td> 0.5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1706</th>\n",
	" <td> 2</td>\n",
	" <td> 3.2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1706</th>\n",
	" <td> 3</td>\n",
	" <td> 5.5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1706</th>\n",
	" <td> 4</td>\n",
	" <td> 8.7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1706</th>\n",
	" <td> 5</td>\n",
	" <td> 14.6</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"output_type": "pyout",
	"prompt_number": 2,
	"text": [
	" Month TG\n",
	"Year \n",
	"1706 1 0.5\n",
	"1706 2 3.2\n",
	"1706 3 5.5\n",
	"1706 4 8.7\n",
	"1706 5 14.6"
	]
	}
	],
	"prompt_number": 2
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# create anomaly by using the local climatology\n",
	"# base period: \n",
	"# start: year - (year % 10) - 29\n",
	"# end year - (year % 10)\n",
	"def normalize(group):\n",
	" \n",
	" end = int(group.name)\n",
	" begin = end - 29\n",
	" \n",
	" dfnorm = df[(df.index >= begin) & (df.index <= end)].groupby('Month')['TG'].mean()\n",
	" group['anom'] = group['TG'] - group.join(dfnorm, on='Month', rsuffix='_norm')['TG_norm']\n",
	"\n",
	" return group\n",
	"\n",
	"dfnew = df.groupby(lambda x: x - np.mod(x, 10)).apply(normalize)\n",
	"dfnew = dfnew[(dfnew.index >= 1901) & (dfnew.index < 2013)]\n",
	"\n",
	"dfnew.head()"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"html": [
	"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Month</th>\n",
	" <th>TG</th>\n",
	" <th>anom</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>Year</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1901</th>\n",
	" <td> 1</td>\n",
	" <td> -0.3</td>\n",
	" <td>-1.766667</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1901</th>\n",
	" <td> 2</td>\n",
	" <td> -0.9</td>\n",
	" <td>-3.516667</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1901</th>\n",
	" <td> 3</td>\n",
	" <td> 3.5</td>\n",
	" <td>-1.136667</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1901</th>\n",
	" <td> 4</td>\n",
	" <td> 9.1</td>\n",
	" <td> 0.973333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1901</th>\n",
	" <td> 5</td>\n",
	" <td> 12.2</td>\n",
	" <td> 0.483333</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"output_type": "pyout",
	"prompt_number": 4,
	"text": [
	" Month TG anom\n",
	"Year \n",
	"1901 1 -0.3 -1.766667\n",
	"1901 2 -0.9 -3.516667\n",
	"1901 3 3.5 -1.136667\n",
	"1901 4 9.1 0.973333\n",
	"1901 5 12.2 0.483333"
	]
	}
	],
	"prompt_number": 4
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"dfnew['Cold'] = (dfnew['anom'] < -0.5)\n",
	"dfnew['block'] = (dfnew['Cold'].shift(1) != dfnew['Cold']).astype(int).cumsum()"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 5
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"def summarize(group):\n",
	" \n",
	" count = group['block'].count()\n",
	" start = pd.datetime.strptime('%04i-%02i' % (group.index[0], group.Month.iloc[0]), '%Y-%m')\n",
	" end = pd.datetime.strptime('%04i-%02i' % (group.index[-1], group.Month.iloc[-1]), '%Y-%m')\n",
	" TG_mean = group.TG.mean()\n",
	" anom_mean = group.anom.mean()\n",
	" \n",
	" # columns selection is just for reordering the columns\n",
	" return pd.Series(data={'count': count, 'start': start, 'end': end, 'TG_mean': TG_mean, 'anom': anom_mean})[['start', 'end', 'count', 'TG_mean', 'anom']]\n",
	"\n",
	"# groupby the unique blocks and summerize the properties of each group\n",
	"dfnew_sum = dfnew.groupby('block').apply(summarize)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 6
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"print dfnew_sum[(dfnew_sum['count'] >= 6) & (dfnew_sum['anom'] < 0)]"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	" start end count TG_mean anom\n",
	"block \n",
	"9 1902-07-01 00:00:00 1902-12-01 00:00:00 6 9.416667 -1.350556\n",
	"107 1923-11-01 00:00:00 1924-04-01 00:00:00 6 2.283333 -1.966667\n",
	"261 1962-03-01 00:00:00 1962-09-01 00:00:00 7 10.94286 -1.851429\n",
	"279 1965-03-01 00:00:00 1965-09-01 00:00:00 7 11.52857 -1.265714\n"
	]
	}
	],
	"prompt_number": 7
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}