Last active
October 13, 2015 13:27
-
-
Save cynici/4202171 to your computer and use it in GitHub Desktop.
Harvest Wunderground.com weather stations in a specific country
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os, sys | |
from optparse import OptionParser | |
import string | |
import urllib2 | |
import simplejson | |
import logging | |
help_text = """usage: %prog [options] ISO_country_code ... | |
Use Wunderground.com autocomplete API to harvest weather stations | |
in the specified country(s). Details: http://www.wunderground.com/weather/api/ | |
Output format: | |
[{ | |
"c": "AO", | |
"l": "/q/zmw:00000.3.66270", | |
"name": "AO Wako Kungo Angola", | |
"type": "city", | |
"tz": "Africa/Luanda", | |
"tzs": "WAT", | |
"zmw": "00000.3.66270" | |
}]""" | |
autocomplete_url = """http://autocomplete.wunderground.com/aq?query=%(alphabet)s&c=%(country_code)s""" | |
""" | |
http://autocomplete.wunderground.com/aq?query=A&c=CL | |
{ "RESULTS": [ { "name": "Antofagasta, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85442", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85442" }, { "name": "Arica, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85406", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85406" }, { "name": "Angol, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85703", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85703" }, { "name": "Ancud, Chile", "type": "city", "c": "CL", "zmw": "00000.10.85799", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.10.85799" }, { "name": "Arauco, Chile", "type": "city", "c": "CL", "zmw": "00000.6.85682", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.6.85682" }, { "name": "Almirante Schroeders, Chile", "type": "city", "c": "CL", "zmw": "00000.7.85934", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.7.85934" }, { "name": "Alto Palena, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85836", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85836" }, { "name": "ANF, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85442", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85442" }, { "name": "ARI, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85406", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85406" }, { "name": "ARR, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85864", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85864" }, { "name": "Arturo Merino Benitez International, Chile", "type": "city", "c": "CL", "zmw": "00000.7.85574", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.7.85574" } ] } | |
""" | |
def main(argv=None): | |
if argv is None: | |
argv = sys.argv | |
debuglevelD = { | |
'debug': logging.DEBUG, | |
'info': logging.INFO, | |
'warning': logging.WARNING, | |
'error': logging.ERROR, | |
'critical': logging.CRITICAL, | |
} | |
scriptname = os.path.basename(__file__) | |
scriptnoext, scriptext = os.path.splitext(__file__) | |
defvals = { | |
} | |
parser = OptionParser(help_text) | |
parser.add_option("-l", "--loglevel", dest="loglevel", type="string", help="Verbosity %s"%debuglevelD.keys(), metavar='LOGLEVEL') | |
parser.add_option("-o", "--output-dir", dest="output_dir", type="string", metavar='DIR', \ | |
help="Output directory. Default is the current directory.") | |
parser.set_defaults(**defvals) | |
(options, args) = parser.parse_args() | |
if options.loglevel: | |
if options.loglevel not in debuglevelD: raise AssertionError("Verbosity level must be one of: %s"%debuglevelD.keys()) | |
dbglvl = debuglevelD[options.loglevel] | |
else: | |
dbglvl = logging.WARNING | |
logger = logging.getLogger() | |
logger.setLevel(dbglvl) | |
ch = logging.StreamHandler() | |
ch.setFormatter( logging.Formatter('%(asctime)s %(lineno)d %(name)s %(funcName)s - %(levelname)s - %(message)s') ) | |
ch.setLevel(dbglvl) | |
logger.addHandler(ch) | |
if not args: | |
parser.error("Requires at least one ISO country code.") | |
output_dir = options.output_dir | |
if output_dir: | |
if not os.path.isdir(output_dir): | |
parser.error("Invalid output directory: %s" % output_dir) | |
else: | |
output_dir = os.getcwd() | |
for country_code in args: | |
stations = {} | |
for alphabet in 'I': #string.uppercase: | |
try: | |
url = autocomplete_url % dict(country_code=country_code.upper(), alphabet=alphabet) | |
logger.debug("GET %s ..."%(url)) | |
resp = urllib2.urlopen(url) | |
raw = resp.read() | |
# http://stackoverflow.com/questions/6180521/unicodedecodeerror-utf8-codec-cant-decode-bytes-in-position-3-6-invalid-dat | |
d = simplejson.loads(unicode(raw, "ISO-8859-1")) | |
# Iterate through list of stations | |
""" | |
{'c': 'MW', | |
'l': '/q/zmw:00000.1.67797', | |
'name': 'Makanga, Malawi', | |
'type': 'city', | |
'tz': 'Africa/Blantyre', | |
'tzs': 'CAT', | |
'zmw': '00000.1.67797'} | |
""" | |
for stn_dict in d['RESULTS']: | |
if 'l' not in stn_dict or '/q/' not in stn_dict['l']: | |
continue | |
# Prepend country code to station name | |
stn_dict['name'] = '%s %s'%(stn_dict['c'], stn_dict['name'].replace(',','')) | |
# Save stations in a country by unique query-key to avoid duplicate | |
stations[ stn_dict['l'] ] = stn_dict | |
except Exception, err: | |
logger.error("No result for '%s': %s"%(url,err)) | |
raise | |
if stations: | |
try: | |
ofp = os.path.join(output_dir, country_code+'.json') | |
ofh = open(ofp, 'w+') | |
result = [] | |
for l, val in stations.items(): | |
result.append(val) | |
ofh.write(simplejson.dumps(result, indent=4, sort_keys=True)) | |
ofh.close() | |
logger.info("Saved %d stations to %s"%(len(result), ofp)) | |
except Exception, err: | |
logger.error("Failed to save output to %s: %s"%(ofp, err)) | |
else: | |
logging.info("No usable in country '%s'"%(country_code)) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment