Created
July 18, 2012 13:27
-
-
Save themiurgo/3136205 to your computer and use it in GitHub Desktop.
A Google Geocoder with a MongoDB memoizer. For the ratelim module, look at https://gist.github.com/3006305
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import time | |
import requests | |
import pymongo | |
import ratelim | |
import textwrap | |
G_GEO_SUCCESS = 200 | |
G_GEO_SERVER_ERROR = 500 | |
G_GEO_MISSING_QUERY = 601 | |
G_GEO_UNKNOWN_ADDRESS = 602 | |
G_GEO_UNAVAILABLE_ADDRESS = 603 | |
G_GEO_BAD_KEY = 610 | |
G_GEO_TOO_MANY_QUERIES = 620 | |
ERR_MESSAGES = { | |
G_GEO_SUCCESS: textwrap.dedent("""\ | |
No errors occurred; the address was | |
successfully parsed and its geocode was returned."""), | |
G_GEO_SERVER_ERROR: textwrap.dedent("""\ | |
A geocoding or directions request | |
could not be successfully processed, yet the exact reason for the failure | |
is unknown."""), | |
G_GEO_MISSING_QUERY: textwrap.dedent("""\ | |
An empty address was specified in the HTTP q parameter."""), | |
G_GEO_UNKNOWN_ADDRESS: textwrap.dedent("""\ | |
No corresponding geographic location could be found for the specified address, | |
possibly because the address is relatively new, or because it may be incorrect."""), | |
G_GEO_UNAVAILABLE_ADDRESS: textwrap.dedent("""\ | |
The geocode for the given | |
address or the route for the given directions query cannot be returned due | |
to legal or contractual reasons."""), | |
G_GEO_BAD_KEY: textwrap.dedent("""\ | |
The given key is either invalid or does | |
not match the domain for which it was given."""), | |
G_GEO_TOO_MANY_QUERIES: textwrap.dedent("""\ | |
The given key has gone over the | |
requests limit in the 24 hour period or has submitted too many requests in | |
too short a period of time. If you're sending multiple requests in parallel | |
or in a tight loop, use a timer or pause in your code to make sure you | |
don't send the requests too quickly."""), | |
} | |
class MongoDict(collections.MutableMapping): | |
"""A dictionary which stores values in a MongoDB collection.""" | |
def __init__(self, hostname, port, dbname, collection, keyname, *args, **kwargs): | |
self.store = getattr(pymongo.Connection(hostname, port)[dbname], | |
keyname) | |
self.update(dict(*args, **kwargs)) # use the free update to set keys | |
def __getitem__(self, key): | |
result = self.store.find_one({"query": self._transformkey(key)}) | |
if not result: | |
raise KeyError | |
return result['value'] | |
def __setitem__(self, key, value): | |
element = {} | |
element['query'] = self._transformkey(key) | |
element['value'] = value | |
self.store.insert(element) | |
def __delitem__(self, key): | |
raise NotImplementedError | |
def __iter__(self): | |
return self.store.find() | |
def __len__(self): | |
return self.store.find().count() | |
# This dictionary has case-unsensitive keys | |
def _transformkey(self, key): | |
try: | |
return key.lower() | |
except: | |
return key | |
class mongomemoized(object): | |
"""Decorator. Caches a function's return value each time it is called. | |
If called later with the same arguments, the cached value is returned | |
(not reevaluated). | |
""" | |
def __init__(self, hostname, port, dbname, collection, keyname): | |
self.cache = MongoDict(hostname, port, dbname, collection, keyname) | |
def __call__(self, f): | |
def wrapped_f(args): | |
if not isinstance(args, collections.Hashable): | |
# uncacheable. a list, for instance. | |
# better to not cache than blow up. | |
return f(args) | |
if args in self.cache: | |
return self.cache[args] | |
else: | |
value = f(args) | |
self.cache[args] = value | |
return value | |
return wrapped_f | |
def __repr__(self): | |
"""Return the function's docstring.""" | |
return self.func.__doc__ | |
def __get__(self, obj, objtype): | |
"""Support instance methods.""" | |
return functools.partial(self.__call__, obj) | |
# Make sure the decorator order is not changed: the rate limit works only for | |
# the inner function, while the memoized function is not rate limited | |
@mongomemoized("localhost", 27017, "geocoded", "google", "query") | |
@ratelim.rate_evenly_limited(14000, 86400) | |
def geocode(text): | |
url = "http://maps.googleapis.com/maps/geo" | |
params = {"q": text, | |
"key": "AIzaSyCeFJ26MUa7qNYLBUXfXD8kQPWkevqzCK4", | |
"output": "json", | |
"sensor": "false", | |
} | |
resp = requests.get(url, params=params) | |
try: | |
resp = resp.json | |
code = resp['Status']['code'] | |
except Exception as e: | |
print "Error on unpacking the response" | |
raise | |
try: | |
assert code == G_GEO_SUCCESS | |
place = resp['Placemark'] | |
except (AssertionError, Exception) as e: | |
print ERR_MESSAGES[code] | |
if 602 <= code <= 603: | |
return None | |
if code == 620 or code == 610: | |
sys.exit(1) | |
raise | |
return place | |
I should remove this comment, I love MongoDB with the use of their awesome PyMongo library. I still don't know how I would incorporate it in the Geocoder
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I wouldn't use MongoDB for that purpose, it would be far easier to use Sqlite to store those values.
Thanks for the code preview. I'll try to work something out with Sqlite, it's a lot easier to manage and requires no setup since it's apart of the default python libraries.