Skip to content

Instantly share code, notes, and snippets.

@cynici
Last active October 13, 2015 13:27
Show Gist options
  • Save cynici/4202171 to your computer and use it in GitHub Desktop.
Save cynici/4202171 to your computer and use it in GitHub Desktop.
Harvest Wunderground.com weather stations in a specific country
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, sys
from optparse import OptionParser
import string
import urllib2
import simplejson
import logging
help_text = """usage: %prog [options] ISO_country_code ...
Use Wunderground.com autocomplete API to harvest weather stations
in the specified country(s). Details: http://www.wunderground.com/weather/api/
Output format:
[{
"c": "AO",
"l": "/q/zmw:00000.3.66270",
"name": "AO Wako Kungo Angola",
"type": "city",
"tz": "Africa/Luanda",
"tzs": "WAT",
"zmw": "00000.3.66270"
}]"""
autocomplete_url = """http://autocomplete.wunderground.com/aq?query=%(alphabet)s&c=%(country_code)s"""
"""
http://autocomplete.wunderground.com/aq?query=A&c=CL
{ "RESULTS": [ { "name": "Antofagasta, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85442", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85442" }, { "name": "Arica, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85406", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85406" }, { "name": "Angol, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85703", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85703" }, { "name": "Ancud, Chile", "type": "city", "c": "CL", "zmw": "00000.10.85799", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.10.85799" }, { "name": "Arauco, Chile", "type": "city", "c": "CL", "zmw": "00000.6.85682", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.6.85682" }, { "name": "Almirante Schroeders, Chile", "type": "city", "c": "CL", "zmw": "00000.7.85934", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.7.85934" }, { "name": "Alto Palena, Chile", "type": "city", "c": "CL", "zmw": "00000.1.85836", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.1.85836" }, { "name": "ANF, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85442", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85442" }, { "name": "ARI, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85406", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85406" }, { "name": "ARR, Chile", "type": "city", "c": "CL", "zmw": "00000.5.85864", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.5.85864" }, { "name": "Arturo Merino Benitez International, Chile", "type": "city", "c": "CL", "zmw": "00000.7.85574", "tz": "America/Santiago", "tzs": "CLST", "l": "/q/zmw:00000.7.85574" } ] }
"""
def main(argv=None):
if argv is None:
argv = sys.argv
debuglevelD = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'critical': logging.CRITICAL,
}
scriptname = os.path.basename(__file__)
scriptnoext, scriptext = os.path.splitext(__file__)
defvals = {
}
parser = OptionParser(help_text)
parser.add_option("-l", "--loglevel", dest="loglevel", type="string", help="Verbosity %s"%debuglevelD.keys(), metavar='LOGLEVEL')
parser.add_option("-o", "--output-dir", dest="output_dir", type="string", metavar='DIR', \
help="Output directory. Default is the current directory.")
parser.set_defaults(**defvals)
(options, args) = parser.parse_args()
if options.loglevel:
if options.loglevel not in debuglevelD: raise AssertionError("Verbosity level must be one of: %s"%debuglevelD.keys())
dbglvl = debuglevelD[options.loglevel]
else:
dbglvl = logging.WARNING
logger = logging.getLogger()
logger.setLevel(dbglvl)
ch = logging.StreamHandler()
ch.setFormatter( logging.Formatter('%(asctime)s %(lineno)d %(name)s %(funcName)s - %(levelname)s - %(message)s') )
ch.setLevel(dbglvl)
logger.addHandler(ch)
if not args:
parser.error("Requires at least one ISO country code.")
output_dir = options.output_dir
if output_dir:
if not os.path.isdir(output_dir):
parser.error("Invalid output directory: %s" % output_dir)
else:
output_dir = os.getcwd()
for country_code in args:
stations = {}
for alphabet in 'I': #string.uppercase:
try:
url = autocomplete_url % dict(country_code=country_code.upper(), alphabet=alphabet)
logger.debug("GET %s ..."%(url))
resp = urllib2.urlopen(url)
raw = resp.read()
# http://stackoverflow.com/questions/6180521/unicodedecodeerror-utf8-codec-cant-decode-bytes-in-position-3-6-invalid-dat
d = simplejson.loads(unicode(raw, "ISO-8859-1"))
# Iterate through list of stations
"""
{'c': 'MW',
'l': '/q/zmw:00000.1.67797',
'name': 'Makanga, Malawi',
'type': 'city',
'tz': 'Africa/Blantyre',
'tzs': 'CAT',
'zmw': '00000.1.67797'}
"""
for stn_dict in d['RESULTS']:
if 'l' not in stn_dict or '/q/' not in stn_dict['l']:
continue
# Prepend country code to station name
stn_dict['name'] = '%s %s'%(stn_dict['c'], stn_dict['name'].replace(',',''))
# Save stations in a country by unique query-key to avoid duplicate
stations[ stn_dict['l'] ] = stn_dict
except Exception, err:
logger.error("No result for '%s': %s"%(url,err))
raise
if stations:
try:
ofp = os.path.join(output_dir, country_code+'.json')
ofh = open(ofp, 'w+')
result = []
for l, val in stations.items():
result.append(val)
ofh.write(simplejson.dumps(result, indent=4, sort_keys=True))
ofh.close()
logger.info("Saved %d stations to %s"%(len(result), ofp))
except Exception, err:
logger.error("Failed to save output to %s: %s"%(ofp, err))
else:
logging.info("No usable in country '%s'"%(country_code))
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment