flamingbear · August 29, 2015 14:20
diff --git a/Sea_Ice_Statistics.ipynb b/Sea_Ice_Statistics.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Compute summary statistics for the daily sea ice index.\n",
    "\n",
    "# From the CSV files determine the day of maximum and minimum extent for each month and how that month's max and min ranks with all other months\n",
    "\n",
    "The input data format is just a date and extent for each day we have data.\n",
    "```\n",
    "Year, Month, Day,     Extent,    Missing, Source Data\n",
    "YYYY,    MM,  DD, 10^6 sq km, 10^6 sq km, Source data product web site: http://nsidc.org/d....\n",
    "1978,    10,  26,     10.231,      0.000, ftp://sidads.colorado.edu/pub/DATASETS/nsidc0051....\n",
    "1978,    10,  28,     10.420,      0.000, ftp://sidads.colorado.edu/pub/DATASETS/nsidc0051....\n",
    "1978,    10,  30,     10.557,      0.000, ftp://sidads.colorado.edu/pub/DATASETS/nsidc0051....\n",
    "....\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Start by downloading the daily sea ice extent data from NSIDC's website."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!wget -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_final.csv\n",
    "!wget -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_nrt.csv\n",
    "!wget -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_final.csv\n",
    "!wget -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_nrt.csv\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    }
   ],
   "source": [
    "import datetime as dt\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "%pylab inline\n",
    "import pandas as pd\n",
    "from pandas import ExcelWriter\n",
    "pd.options.display.mpl_style = 'default'\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "code to read the CSV files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\n",
    "def parse_the_date(year, mm, dd):\n",
    "    return dt.date(int(year), int(mm), int(dd))\n",
    "\n",
    "def slurp_csv(filename):\n",
    "    data = pd.read_csv(filename, header = None, skiprows=2,\n",
    "                       names=[\"year\", \"mm\", \"dd\", \"extent\", \"missing\", \"source\"],\n",
    "                       parse_dates={'date':['year', 'mm', 'dd']},\n",
    "                       date_parser=parse_the_date, index_col='date')\n",
    "    data = data.drop('missing', axis=1)\n",
    "    return data\n",
    "\n",
    "def read_a_hemisphere(hemisphere):\n",
    "    final_prod_filename = os.path.join('{hemi}H_seaice_extent_final.csv'.format(hemi=hemisphere[0:1].upper()))\n",
    "    nrt_prod_filename = os.path.join('{hemi}H_seaice_extent_nrt.csv'.format(hemi=hemisphere[0:1].upper()))\n",
    "\n",
    "    final = slurp_csv(final_prod_filename)\n",
    "    nrt = slurp_csv(nrt_prod_filename)\n",
    "    all_data = pd.concat([final, nrt])\n",
    "    return all_data\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    " Read CSV data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>extent</th>\n",
       "      <th>source</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1978-10-26</th>\n",
       "      <td>17.634</td>\n",
       "      <td>ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-28</th>\n",
       "      <td>17.815</td>\n",
       "      <td>ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-30</th>\n",
       "      <td>17.671</td>\n",
       "      <td>ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-01</th>\n",
       "      <td>17.534</td>\n",
       "      <td>ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-03</th>\n",
       "      <td>17.493</td>\n",
       "      <td>ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            extent                                             source\n",
       "date                                                                 \n",
       "1978-10-26  17.634   ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...\n",
       "1978-10-28  17.815   ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...\n",
       "1978-10-30  17.671   ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...\n",
       "1978-11-01  17.534   ftp://sidads.colorado.edu/pub/DATASETS/nsidc0...\n",
       "1978-11-03  17.493   ftp://sidads.colorado.edu/pub/DATASETS/nsidc0..."
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "north = read_a_hemisphere('north')\n",
    "south = read_a_hemisphere('south')\n",
    "south.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Add columns for year and month: We have do this because when we read the CSV file\n",
    "we converted the existing year/month/day columns into a python datetime object.\n",
    "also drop the source because we don't care where the data came from (near real time or production)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def add_year_month_columns(df):\n",
    "    a = df.copy()\n",
    "    a = a.drop('source',1)\n",
    "    a = a.reset_index()\n",
    "    a['year'] = pd.to_datetime(a.date).dt.year\n",
    "    a['month'] = pd.to_datetime(a.date).dt.month\n",
    "    a = a.set_index('date')\n",
    "    return a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "north = add_year_month_columns(north)\n",
    "south = add_year_month_columns(south)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>extent</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1978-10-26</th>\n",
       "      <td>10.231</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-28</th>\n",
       "      <td>10.420</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-30</th>\n",
       "      <td>10.557</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-01</th>\n",
       "      <td>10.670</td>\n",
       "      <td>1978</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-03</th>\n",
       "      <td>10.787</td>\n",
       "      <td>1978</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            extent  year  month\n",
       "date                           \n",
       "1978-10-26  10.231  1978     10\n",
       "1978-10-28  10.420  1978     10\n",
       "1978-10-30  10.557  1978     10\n",
       "1978-11-01  10.670  1978     11\n",
       "1978-11-03  10.787  1978     11"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "north.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>extent</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1978-10-26</th>\n",
       "      <td>17.634</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-28</th>\n",
       "      <td>17.815</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-10-30</th>\n",
       "      <td>17.671</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-01</th>\n",
       "      <td>17.534</td>\n",
       "      <td>1978</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1978-11-03</th>\n",
       "      <td>17.493</td>\n",
       "      <td>1978</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            extent  year  month\n",
       "date                           \n",
       "1978-10-26  17.634  1978     10\n",
       "1978-10-28  17.815  1978     10\n",
       "1978-10-30  17.671  1978     10\n",
       "1978-11-01  17.534  1978     11\n",
       "1978-11-03  17.493  1978     11"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "south.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Add 5 day rolling mean to the timesereis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def add_rolling_mean(df, window=5, min_periods=2):\n",
    "    copy = df.copy()\n",
    "    # create an empty ts to align our extent data with\n",
    "    ts = pd.Series(NaN, index=pd.date_range('1978-10-25', dt.date.today().strftime('%Y-%m-%d')))\n",
    "    copy.index = copy.index.to_datetime()\n",
    "    copy = df.align(ts, axis=0, join='right')[0]\n",
    "    df['5day-Avg'] = pd.rolling_mean(copy['extent'], window=5, min_periods=2)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Want date back in the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "north = add_rolling_mean(north)\n",
    "south = add_rolling_mean(south)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>extent</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>5day-Avg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1978-10-26</th>\n",
       "      <td>10.231</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            extent  year  month  5day-Avg\n",
       "date                                     \n",
       "1978-10-26  10.231  1978     10       NaN"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "north.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>extent</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>5day-Avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1978-10-26</td>\n",
       "      <td>10.231</td>\n",
       "      <td>1978</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date  extent  year  month  5day-Avg\n",
       "0  1978-10-26  10.231  1978     10       NaN"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "north = north.reset_index()\n",
    "south = south.reset_index()\n",
    "north.head(1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Use a groupby to compute the row locations that represent the minimum and\n",
    "maximum extent and grab those rows into new variables.  AKA: Filter out everything\n",
    "but the minimum/maximum extent for each month and year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def select_min_and_max_variable_rows_by_year_and_month(df, variable):\n",
    "    min_groups = df.loc[df.groupby(['year','month'])[variable].idxmin()][['date', variable, 'year', 'month']]\n",
    "    max_groups = df.loc[df.groupby(['year','month'])[variable].idxmax()][['date', variable, 'year', 'month']]\n",
    "    return {'min': min_groups,  'max': max_groups}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "create dictionaries of max and min values for each hemisphere and for daily and rolling-mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n = select_min_and_max_variable_rows_by_year_and_month(north, 'extent')\n",
    "navg = select_min_and_max_variable_rows_by_year_and_month(north, '5day-Avg')\n",
    "s = select_min_and_max_variable_rows_by_year_and_month(south, 'extent')\n",
    "savg = select_min_and_max_variable_rows_by_year_and_month(south, '5day-Avg')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "show that we have actually selected different data for daily and 5-average data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>extent</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>1979-01-30</td>\n",
       "      <td>15.912</td>\n",
       "      <td>1979</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>1979-02-25</td>\n",
       "      <td>16.579</td>\n",
       "      <td>1979</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date  extent  year  month\n",
       "48  1979-01-30  15.912  1979      1\n",
       "61  1979-02-25  16.579  1979      2"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n['max'][3:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>5day-Avg</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>1979-01-30</td>\n",
       "      <td>15.795333</td>\n",
       "      <td>1979</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>1979-02-27</td>\n",
       "      <td>16.515000</td>\n",
       "      <td>1979</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date   5day-Avg  year  month\n",
       "48  1979-01-30  15.795333  1979      1\n",
       "62  1979-02-27  16.515000  1979      2"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "navg['max'][3:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def add_rank(df, rank_by):\n",
    "    df['rank'] = df.groupby('month')[rank_by].rank()\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "add rank column for each month and hemsiphere's max and min:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n['max'] = add_rank(n['max'], 'extent')\n",
    "n['min'] = add_rank(n['min'], 'extent')\n",
    "s['max'] = add_rank(s['max'], 'extent')\n",
    "s['min'] = add_rank(s['min'], 'extent')\n",
    "\n",
    "navg['max'] = add_rank(navg['max'], '5day-Avg')\n",
    "navg['min'] = add_rank(navg['min'], '5day-Avg')\n",
    "savg['max'] = add_rank(savg['max'], '5day-Avg')\n",
    "savg['min'] = add_rank(savg['min'], '5day-Avg')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def do_annual_min_max_ranking(df, field):\n",
    "    min_index = df.groupby(['year'])[field].idxmin()\n",
    "    max_index = df.groupby(['year'])[field].idxmax()\n",
    "    mindata = df.loc[min_index][['date', field]]\n",
    "    mindata['rank'] = mindata[field].rank()\n",
    "    maxdata = df.loc[max_index][['date', field]]\n",
    "    maxdata['rank'] = maxdata[field].rank()\n",
    "\n",
    "    mindata = mindata.set_index(pd.to_datetime(mindata.date).dt.year)\n",
    "    maxdata = maxdata.set_index(pd.to_datetime(maxdata.date).dt.year)\n",
    "\n",
    "    maxdata = maxdata.add_prefix('max_')\n",
    "    mindata = mindata.add_prefix('min_')\n",
    "\n",
    "    data = pd.concat([mindata, maxdata], axis=1)\n",
    "    return data\n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "It is also desired that we have Annual min/max rank data so revisit the north and south"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "north_annual_by_day = do_annual_min_max_ranking(north, 'extent')\n",
    "north_annual_averaged = do_annual_min_max_ranking(north, '5day-Avg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "south_annual_by_day = do_annual_min_max_ranking(south, 'extent')\n",
    "south_annual_averaged = do_annual_min_max_ranking(south, '5day-Avg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>min_date</th>\n",
       "      <th>min_5day-Avg</th>\n",
       "      <th>min_rank</th>\n",
       "      <th>max_date</th>\n",
       "      <th>max_5day-Avg</th>\n",
       "      <th>max_rank</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1978</th>\n",
       "      <td>1978-12-31</td>\n",
       "      <td>7.596000</td>\n",
       "      <td>38</td>\n",
       "      <td>1978-10-28</td>\n",
       "      <td>17.7245</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1979</th>\n",
       "      <td>1979-02-19</td>\n",
       "      <td>2.928333</td>\n",
       "      <td>25</td>\n",
       "      <td>1979-09-15</td>\n",
       "      <td>18.3230</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1980</th>\n",
       "      <td>1980-02-26</td>\n",
       "      <td>2.574000</td>\n",
       "      <td>7</td>\n",
       "      <td>1980-09-25</td>\n",
       "      <td>19.0470</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        min_date  min_5day-Avg  min_rank    max_date  max_5day-Avg  max_rank\n",
       "1978  1978-12-31      7.596000        38  1978-10-28       17.7245         2\n",
       "1979  1979-02-19      2.928333        25  1979-09-15       18.3230         8\n",
       "1980  1980-02-26      2.574000         7  1980-09-25       19.0470        28"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "south_annual_averaged.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Write out the data frames in a nice format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import calendar\n",
    "month_names = [calendar.month_name[x] for x in range(1,13)]\n",
    "\n",
    "def swap_column_level_and_sort(df):\n",
    "    df.columns = df.columns.swaplevel(1,0)\n",
    "    df = df.sortlevel(0, axis=1)\n",
    "    return df\n",
    "\n",
    "# set index to year and month and then broadcast month back across the columns.\n",
    "# next swap and sort so that you have the data grouped under the month.\n",
    "def prepare_for_csv(df):\n",
    "    df = df.set_index(['year','month']).unstack('month')\n",
    "    df = swap_column_level_and_sort(df)\n",
    "    df.columns = df.columns.set_levels(month_names, level=0)\n",
    "    return df\n",
    "\n",
    "\n",
    "def write_to_xls(df_list, writer, is_monthly=True):\n",
    "    for df, sheet in df_list:\n",
    "        if is_monthly:\n",
    "            df = prepare_for_csv(df)\n",
    "        df.to_excel(writer,'{sheet}'.format(sheet=sheet), float_format=\"%.3f\")\n",
    "\n",
    "\n",
    "writer = ExcelWriter('Sea_Ice_Statistics.xls')\n",
    "\n",
    "monthly_dataframelist =[(navg['min'], 'Northern 5day Min'),\n",
    "                        (navg['max'], 'Northern 5day Max'),\n",
    "                        (savg['min'], 'Southern 5day Min'),\n",
    "                        (savg['max'], 'Southern 5day Max'),\n",
    "                        (n['min'], 'Northern Daily Min'),\n",
    "                        (n['max'], 'Northern Daily Max'),\n",
    "                        (s['min'], 'Southern Daily Min'),\n",
    "                        (s['max'], 'Southern Daily Max')]\n",
    "\n",
    "annual_dataframelist = [(north_annual_averaged, 'North Annual 5day-avg'),\n",
    "                        (north_annual_by_day, 'North Annual Daily'),\n",
    "                        (south_annual_averaged, 'South Annual 5day-avg'),\n",
    "                        (south_annual_by_day, 'South Annual Daily')]\n",
    "\n",
    "write_to_xls(monthly_dataframelist, writer, is_monthly=True)\n",
    "write_to_xls(annual_dataframelist, writer, is_monthly=False)\n",
    "\n",
    "writer.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  },
  "name": "Sea_Ice_Statistics.ipynb"
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }