Last active
August 29, 2015 14:16
-
-
Save jxnl/b089ee641c3c0027eadc to your computer and use it in GitHub Desktop.
A yelp Search Request only returns min(n, 20) responses for a certain geo-query. This script will recursively Quadrisect the box until there is 20 in each box. It will also print the json output of the response
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import print_function | |
| from Queue import Queue | |
| import json | |
| def subdivide(posn1, posn2): | |
| """ | |
| Accepts two Tuple2 and returns a generator that produces the four new boxes. | |
| Args: | |
| posn1 - south-west coord | |
| posn2 - north-east coord | |
| Yields: | |
| posn - SW, NE tuples | |
| """ | |
| mid = lambda (a, b): (a + b) / 2. | |
| a, b = posn1 | |
| c, d = posn2 | |
| bd, ac = mid(b, d), mid(a, c) | |
| # top_left | |
| yield ((a, bd), (ac, d)) | |
| yield ((ac, bd), (c, d)) | |
| yield ((a, b), (ac, bd)) | |
| yield ((ac, b), (c, bd)) | |
| def recurse_divide(posn1, posn2, n, queue): | |
| """ | |
| Populates lst with 4^n subdivisions of a box defined by posn1, posn2 | |
| Args: | |
| posn1 - south-west coord | |
| posn2 - north-east coord | |
| n - number of new quadrant divisions | |
| queue - container for new divisions | |
| """ | |
| for (posn11, posn22) in subdivide(posn1, posn2): | |
| recurse_divide(posn11, posn22, n - 1, queue) if n - 1 \ | |
| else queue.put((posn11, posn22)) | |
| return None | |
| def crawl(queue): | |
| """ | |
| Crawl a queue of box coordinates | |
| Args: | |
| queue - Queue() | |
| """ | |
| temp = "http://api.yelp.com/v2/search?bounds={},{}" | |
| while not queue.empty(): | |
| p1, p2 = queue.get() | |
| url = temp.format(*p1) + "|" + "{},{}".format(*p2) | |
| content = yelp_search.get(url).content | |
| response = json.loads(content) | |
| try: | |
| total = response[u'total'] | |
| except: | |
| total = 0 | |
| if total > 20 and total != cur: | |
| recurse_divide(p1, p2, 1, queue) | |
| if total > 0: | |
| yield response | |
| # Initializing Emily's Jobs | |
| # Toronto | |
| posn1 = (43.601887,-79.409602) | |
| posn2 = (43.693345,-79.350158) | |
| # Create YelpSession | |
| yelp_search = OAuth1Session(...) | |
| # Initalize Queue and fill with 16 subdivisions | |
| box_queue = Queue() | |
| recurse_divide(posn1, posn2, 2, box_queue) | |
| # Crawl!!!! | |
| for response in crawl(box_queue): | |
| json_str = json.dumps(response) | |
| print(json_str) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
rekt'd yelp.