rsignell-usgs · December 7, 2012 20:12
diff --git a/clim.ipynb b/clim.ipynb
 {
 "metadata": {
  "name": "Manning Climatology Test"
 },
 "nbformat": 3,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "input": "\"\"\"\nRead an eMOLT data either:\n    -remote data on the web (via OPeNDAP)\n    -from a local ascii file\n    \nCreated on Sun Nov 18 06:33:34 2012\n\n@author: JiM\n\"\"\"\n#### HARDCODES ###\nsite='BN01'\nsource='opendap' #alternative is 'opendap'\n##################\n\nfrom datetime import datetime, timedelta\nfrom pandas import Series,read_csv",
     "language": "python",
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "input": "cd /usgs/data1/notebook/oceanography",
     "language": "python",
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "/usgs/data1/notebook/oceanography\n"
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "input": "#step 1: read data\nif source=='opendap':\n  # reading remote data\n  import getdata # assumes you have the \"oceanography modules\" in this directory\n  [datet,temp,u,v,depth,st,et]=getdata.getemolt('BN01')\n  tso=Series(temp,index=datet)\n\nelse: #reading local ascii files\n  #Define a custom date parser\n  def parse(datet):\n    ''' parses a few date columms that we have combined inside read_csv'''  \n    dt = datetime.strptime(datet[0:10],'%Y-%m-%d')\n    delta = timedelta(hours=int(datet[11:13]), minutes= int(datet[13:15]))\n    return dt + delta\n  tso=read_csv(site+'.dat',sep='\\s+', parse_dates={'datet':['yearmthday','hr']}, index_col='datet',date_parser=parse)  #the index_col='datet' means that your index will be datetime\n  tso=tso.temp",
     "language": "python",
     "outputs": [
      {
       "ename": "TypeError",
       "evalue": "getemolt() takes exactly 3 arguments (1 given)",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-7-485300028b72>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m   \u001b[1;31m# reading remote data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m   \u001b[1;32mimport\u001b[0m \u001b[0mgetdata\u001b[0m \u001b[1;31m# assumes you have the \"oceanography modules\" in this directory\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m   \u001b[1;33m[\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mu\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdepth\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mst\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0met\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mgetdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetemolt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'BN01'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m   \u001b[0mtso\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mTypeError\u001b[0m: getemolt() takes exactly 3 arguments (1 given)"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "input": "#step 2: create a daily mean and then remove those with < 18 hours\ntsod=tso.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\ntsod.ix[tsod['count']<18,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsod['yy']=tsod.index.year\ntsod['mm']=tsod.index.month\ntsod['dd']=tsod.index.day\ntsod['mean']=(tsod['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsodp=tsod.reindex(columns=output_fmt)\n#write results to a file\ntsodp.to_csv(site+'_wtmp_da.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
     "language": "python",
     "outputs": []
    },
    {
     "cell_type": "code",
     "input": "#Step #3: create a monthly mean\ntsom=tso.resample('m',how=['count','mean','median','min','max','std'],kind='period')\ntsom.ix[tsom['count']<25*24,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsom['yy']=tsom.index.year\ntsom['mm']=tsom.index.month\ntsom['dd']=15\ntsom['mean']=(tsom['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsomp=tsom.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsomp.to_csv(site+'_wtmp_ma.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
     "language": "python",
     "outputs": []
    },
    {
     "cell_type": "code",
     "input": "#Step 4: generate daily climatolgies\ndef parseyd(yd):\n    ''' parses yearday columm to generate a common year. I set them all to year 2000.'''\n    dt=datetime(2000,1,1)\n    delta = timedelta(days=float(yd))\n    return dt + delta\ntsc=read_csv(\"BN01.dat\",sep='\\s+', parse_dates={'datet':['yd']}, index_col='datet',date_parser=parseyd)  #the index_col='datet' means that your index will be datetime\ntsc=tsc.temp\ntsdc=tsc.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\n#add columns for custom date format\ntsdc['yy']=0\ntsdc['mm']=tsdc.index.month\ntsdc['dd']=tsdc.index.day\ntsdc['mean']=(tsdc['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsdcp=tsdc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsdcp.to_csv(site+'_wtmp_dc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
     "language": "python",
     "outputs": []
    },
    {
     "cell_type": "code",
     "input": "#Step 5: generate monthly climatolgies\ntsmc=tsdc['mean'].resample('m',how=['count','mean','median','min','max','std'],loffset=timedelta(days=-15))\n#add columns for custom date format\ntsmc['yy']=0\ntsmc['mm']=tsmc.index.month\ntsmc['dd']=0\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsmcp=tsmc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsmcp.to_csv(site+'_wtmp_mc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')\n\n\n  ",
     "language": "python",
     "outputs": []
    }
   ]
  }
 ]
 }
	{
	"metadata": {
	"name": "Manning Climatology Test"
	},
	"nbformat": 3,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"input": "\"\"\"\nRead an eMOLT data either:\n -remote data on the web (via OPeNDAP)\n -from a local ascii file\n \nCreated on Sun Nov 18 06:33:34 2012\n\n@author: JiM\n\"\"\"\n#### HARDCODES ###\nsite='BN01'\nsource='opendap' #alternative is 'opendap'\n##################\n\nfrom datetime import datetime, timedelta\nfrom pandas import Series,read_csv",
	"language": "python",
	"outputs": [],
	"prompt_number": 5
	},
	{
	"cell_type": "code",
	"input": "cd /usgs/data1/notebook/oceanography",
	"language": "python",
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": "/usgs/data1/notebook/oceanography\n"
	}
	],
	"prompt_number": 6
	},
	{
	"cell_type": "code",
	"input": "#step 1: read data\nif source=='opendap':\n # reading remote data\n import getdata # assumes you have the \"oceanography modules\" in this directory\n [datet,temp,u,v,depth,st,et]=getdata.getemolt('BN01')\n tso=Series(temp,index=datet)\n\nelse: #reading local ascii files\n #Define a custom date parser\n def parse(datet):\n ''' parses a few date columms that we have combined inside read_csv''' \n dt = datetime.strptime(datet[0:10],'%Y-%m-%d')\n delta = timedelta(hours=int(datet[11:13]), minutes= int(datet[13:15]))\n return dt + delta\n tso=read_csv(site+'.dat',sep='\\s+', parse_dates={'datet':['yearmthday','hr']}, index_col='datet',date_parser=parse) #the index_col='datet' means that your index will be datetime\n tso=tso.temp",
	"language": "python",
	"outputs": [
	{
	"ename": "TypeError",
	"evalue": "getemolt() takes exactly 3 arguments (1 given)",
	"output_type": "pyerr",
	"traceback": [
	"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
	"\u001b[1;32m<ipython-input-7-485300028b72>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# reading remote data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mgetdata\u001b[0m \u001b[1;31m# assumes you have the \"oceanography modules\" in this directory\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mu\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdepth\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mst\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0met\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mgetdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetemolt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'BN01'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[0mtso\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
	"\u001b[1;31mTypeError\u001b[0m: getemolt() takes exactly 3 arguments (1 given)"
	]
	}
	],
	"prompt_number": 7
	},
	{
	"cell_type": "code",
	"input": "#step 2: create a daily mean and then remove those with < 18 hours\ntsod=tso.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\ntsod.ix[tsod['count']<18,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsod['yy']=tsod.index.year\ntsod['mm']=tsod.index.month\ntsod['dd']=tsod.index.day\ntsod['mean']=(tsod['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsodp=tsod.reindex(columns=output_fmt)\n#write results to a file\ntsodp.to_csv(site+'_wtmp_da.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
	"language": "python",
	"outputs": []
	},
	{
	"cell_type": "code",
	"input": "#Step #3: create a monthly mean\ntsom=tso.resample('m',how=['count','mean','median','min','max','std'],kind='period')\ntsom.ix[tsom['count']<25*24,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsom['yy']=tsom.index.year\ntsom['mm']=tsom.index.month\ntsom['dd']=15\ntsom['mean']=(tsom['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsomp=tsom.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsomp.to_csv(site+'_wtmp_ma.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
	"language": "python",
	"outputs": []
	},
	{
	"cell_type": "code",
	"input": "#Step 4: generate daily climatolgies\ndef parseyd(yd):\n ''' parses yearday columm to generate a common year. I set them all to year 2000.'''\n dt=datetime(2000,1,1)\n delta = timedelta(days=float(yd))\n return dt + delta\ntsc=read_csv(\"BN01.dat\",sep='\\s+', parse_dates={'datet':['yd']}, index_col='datet',date_parser=parseyd) #the index_col='datet' means that your index will be datetime\ntsc=tsc.temp\ntsdc=tsc.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\n#add columns for custom date format\ntsdc['yy']=0\ntsdc['mm']=tsdc.index.month\ntsdc['dd']=tsdc.index.day\ntsdc['mean']=(tsdc['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsdcp=tsdc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsdcp.to_csv(site+'_wtmp_dc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
	"language": "python",
	"outputs": []
	},
	{
	"cell_type": "code",
	"input": "#Step 5: generate monthly climatolgies\ntsmc=tsdc['mean'].resample('m',how=['count','mean','median','min','max','std'],loffset=timedelta(days=-15))\n#add columns for custom date format\ntsmc['yy']=0\ntsmc['mm']=tsmc.index.month\ntsmc['dd']=0\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsmcp=tsmc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsmcp.to_csv(site+'_wtmp_mc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')\n\n\n ",
	"language": "python",
	"outputs": []
	}
	]
	}
	]
	}