Last active
December 16, 2015 06:08
-
-
Save j08lue/5388938 to your computer and use it in GitHub Desktop.
Automatize the download of eSurge data from vannstand.no
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Automatize the download of eSurge data from vannstand.no | |
Troubleshooting | |
=============== | |
If the script does not work anymore, this is likely to be | |
due to changes in the website data request form. | |
Most sensitive: field names (e.g. 'tid', 'aar', etc.). | |
Check the website source code. | |
Note | |
==== | |
The website is | |
http://vannstand.no/index.php/nb/vannstandsdata/malt-vannstand | |
but the data request form is actually in an iframe with src | |
http://vannstand.no/vannscripts/skjema2Ny.php | |
""" | |
import mechanize | |
import datetime | |
import copy | |
import os.path | |
# we need a dictionary with places and their order in the drop-down menue | |
# options for 'sted' in the HTML code: | |
""" | |
<option value="1">Andenes</option> | |
<option value="2">Bergen</option> | |
<option value="3">Bodø</option> | |
<option value="4">Hammerfest</option> | |
<option value="5">Harstad</option> | |
<option value="6">Heimsjø</option> | |
<option value="7">Helgeroa</option> | |
<option value="8">Honningsvåg</option> | |
<option value="9">Kabelvåg</option> | |
<option value="10">Kristiansund</option> | |
<option value="11">Måløy</option> | |
<option value="12">Narvik</option> | |
<option value="13">Ny-Ålesund</option> | |
<option value="14">Oscarsborg</option> | |
<option value="15">Oslo</option> | |
<option value="16">Rørvik</option> | |
<option value="17">Stavanger</option> | |
<option value="18">Tregde</option> | |
<option value="19">Tromsø</option> | |
<option value="20">Trondheim</option> | |
<option value="21">Vardø</option> | |
<option value="22">Viker</option> | |
<option value="23">Ålesund</option> | |
""" | |
# type that into a dictionary: | |
location_names = { | |
'1' : 'Andenes', | |
'2' : 'Bergen', | |
'3': 'Bodoe', | |
'4': 'Hammerfest', | |
'5': 'Harstad', | |
'6': 'Heimsjoe', | |
'7' : 'Helgeroa', | |
'8': 'Honningsvaeg', | |
'9': 'Kabelvaeg', | |
'10': 'Kristiansund', | |
'11' : 'Maaloey', | |
'12': 'Narvik', | |
'13': 'NyAalesund', | |
'14': 'Oscarsborg', | |
'15': 'Oslo', | |
'16': 'Roervik', | |
'17': 'Stavanger', | |
'18': 'Tregde', | |
'19': 'Tromsoe', | |
'20': 'Trondheim', | |
'21': 'Vardoe', | |
'22': 'Viker', | |
'23': 'Aalesund'} | |
def download_from_vannstand(options={}, | |
defaults=dict( | |
sted = ['1'], | |
tid = ['01'], | |
mnd = ['1'], | |
aar = ['1992'], | |
antall_dager = ['30'], | |
nivaa = ['2'], | |
typedata = ['1']), | |
starturl='http://vannstand.no/vannscripts/skjema2Ny.php', | |
outDir='.'): | |
"""Download water level data from vannstand.no | |
Parameters | |
========== | |
options : dict | |
options to be filled into website form | |
defaults : dict | |
defaults for options. Are overwritten with items in options | |
starturl : str | |
url to website | |
outdir : str | |
output directory | |
""" | |
# open browser | |
br = mechanize.Browser() | |
br.open(starturl) | |
# select the form | |
br.select_form(nr=0) | |
br.form.set_all_readonly(False) | |
#print br.form | |
# combine defaults and options | |
allopts = dict(defaults.items() + options.items()) | |
# set options | |
for key,opt in allopts.iteritems(): | |
# make sure all options are lists | |
if not isinstance(opt,list): | |
allopts[key] = [opt] | |
br[key] = [opt] | |
else: | |
br[key] = opt | |
# submit request | |
print 'Requesting data ...' | |
datasite = br.submit() | |
# get download site link | |
print 'Browsing to download site ...' | |
dwnld_link = br.find_link(text='Last ned fil') | |
br.follow_link(dwnld_link) | |
# get file link | |
print 'Finding link to file ...' | |
file_link = br.find_link( | |
text_regex='Beregnet tidevann, observert vannstand og v\xc3\xa6rets virkning') | |
file_url = '/'.join([file_link.base_url,file_link.url]) | |
# define output file name | |
loc = location_names[allopts['sted'][0]] | |
date = datetime.datetime(year=int(allopts['aar'][0]), | |
month=int(allopts['mnd'][0]), | |
day=int(allopts['tid'][0])) | |
datestr = date.strftime('%Y%m%d') | |
fname = 'eSurge_{}_{}_{}days.dat'.format(loc,datestr,allopts['antall_dager'][0]) | |
fpath = os.path.join(outDir,fname) | |
# download file | |
print 'Downloading data to file {} ...'.format(fpath) | |
resp = br.follow_link(file_link) | |
with open(fpath,'w') as fout: | |
fout.write(resp.read()) | |
br.close() | |
def run_in_intervals(locations=['23','7'], | |
firstdate=datetime.datetime(1992,1,1), | |
lastdate=datetime.datetime(2012,12,31), | |
interval=datetime.timedelta(days=30), | |
**kwargs): | |
"""Run the data download from *firstdate* to *lastdate* in steps of *interval* | |
Parameters | |
========== | |
locations : list of str | |
water level locations (see top of this script) | |
make sure that names are set in location_names | |
**kwargs : optional keyword arguments | |
passed on to dowload_from_vannstand() | |
""" | |
for loc in locations: | |
print 'Downloading data for' | |
print '... location: {}'.format(loc) | |
# reset | |
options = {} | |
date = copy.deepcopy(firstdate) | |
# set number of days | |
options['antall_dager'] = '{}'.format(interval.days) | |
# set location | |
options['sted'] = loc | |
while date <= lastdate: | |
print '... date: {}'.format(date) | |
# set time options | |
options['aar'] = '{}'.format(date.year) | |
options['mnd'] = '{}'.format(date.month) | |
options['tid'] = '{:02d}'.format(date.day) | |
# run download | |
download_from_vannstand(options=options,**kwargs) | |
date += interval | |
if __name__ == "__main__": | |
run_in_intervals(outDir='.') | |
download_from_vannstand(outDir='.') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment