Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Created December 7, 2012 20:12
Show Gist options
  • Save rsignell-usgs/4236149 to your computer and use it in GitHub Desktop.
Save rsignell-usgs/4236149 to your computer and use it in GitHub Desktop.
Jim Manning climatology test
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Manning Climatology Test"
},
"nbformat": 3,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"input": "\"\"\"\nRead an eMOLT data either:\n -remote data on the web (via OPeNDAP)\n -from a local ascii file\n \nCreated on Sun Nov 18 06:33:34 2012\n\n@author: JiM\n\"\"\"\n#### HARDCODES ###\nsite='BN01'\nsource='opendap' #alternative is 'opendap'\n##################\n\nfrom datetime import datetime, timedelta\nfrom pandas import Series,read_csv",
"language": "python",
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"input": "cd /usgs/data1/notebook/oceanography",
"language": "python",
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "/usgs/data1/notebook/oceanography\n"
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"input": "#step 1: read data\nif source=='opendap':\n # reading remote data\n import getdata # assumes you have the \"oceanography modules\" in this directory\n [datet,temp,u,v,depth,st,et]=getdata.getemolt('BN01')\n tso=Series(temp,index=datet)\n\nelse: #reading local ascii files\n #Define a custom date parser\n def parse(datet):\n ''' parses a few date columms that we have combined inside read_csv''' \n dt = datetime.strptime(datet[0:10],'%Y-%m-%d')\n delta = timedelta(hours=int(datet[11:13]), minutes= int(datet[13:15]))\n return dt + delta\n tso=read_csv(site+'.dat',sep='\\s+', parse_dates={'datet':['yearmthday','hr']}, index_col='datet',date_parser=parse) #the index_col='datet' means that your index will be datetime\n tso=tso.temp",
"language": "python",
"outputs": [
{
"ename": "TypeError",
"evalue": "getemolt() takes exactly 3 arguments (1 given)",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-7-485300028b72>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# reading remote data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mgetdata\u001b[0m \u001b[1;31m# assumes you have the \"oceanography modules\" in this directory\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mu\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdepth\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mst\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0met\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mgetdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetemolt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'BN01'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[0mtso\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdatet\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: getemolt() takes exactly 3 arguments (1 given)"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"input": "#step 2: create a daily mean and then remove those with < 18 hours\ntsod=tso.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\ntsod.ix[tsod['count']<18,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsod['yy']=tsod.index.year\ntsod['mm']=tsod.index.month\ntsod['dd']=tsod.index.day\ntsod['mean']=(tsod['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsodp=tsod.reindex(columns=output_fmt)\n#write results to a file\ntsodp.to_csv(site+'_wtmp_da.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
"language": "python",
"outputs": []
},
{
"cell_type": "code",
"input": "#Step #3: create a monthly mean\ntsom=tso.resample('m',how=['count','mean','median','min','max','std'],kind='period')\ntsom.ix[tsom['count']<25*24,['mean','median','min','max','std']] = 'NaN'\n#add columns for custom date format\ntsom['yy']=tsom.index.year\ntsom['mm']=tsom.index.month\ntsom['dd']=15\ntsom['mean']=(tsom['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsomp=tsom.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsomp.to_csv(site+'_wtmp_ma.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
"language": "python",
"outputs": []
},
{
"cell_type": "code",
"input": "#Step 4: generate daily climatolgies\ndef parseyd(yd):\n ''' parses yearday columm to generate a common year. I set them all to year 2000.'''\n dt=datetime(2000,1,1)\n delta = timedelta(days=float(yd))\n return dt + delta\ntsc=read_csv(\"BN01.dat\",sep='\\s+', parse_dates={'datet':['yd']}, index_col='datet',date_parser=parseyd) #the index_col='datet' means that your index will be datetime\ntsc=tsc.temp\ntsdc=tsc.resample('D',how=['count','mean','median','min','max','std'],loffset=timedelta(hours=-12))\n#add columns for custom date format\ntsdc['yy']=0\ntsdc['mm']=tsdc.index.month\ntsdc['dd']=tsdc.index.day\ntsdc['mean']=(tsdc['mean'].values-32.0)/1.8\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsdcp=tsdc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsdcp.to_csv(site+'_wtmp_dc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')",
"language": "python",
"outputs": []
},
{
"cell_type": "code",
"input": "#Step 5: generate monthly climatolgies\ntsmc=tsdc['mean'].resample('m',how=['count','mean','median','min','max','std'],loffset=timedelta(days=-15))\n#add columns for custom date format\ntsmc['yy']=0\ntsmc['mm']=tsmc.index.month\ntsmc['dd']=0\noutput_fmt=['yy','mm','dd','count','mean','median','min','max','std']\ntsmcp=tsmc.reindex(columns=output_fmt)# found I needed to generate a new dataframe to print in this order\ntsmcp.to_csv(site+'_wtmp_mc.csv',index=False,header=False,na_rep='NaN',float_format='%10.2f')\n\n\n ",
"language": "python",
"outputs": []
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment