Skip to content

Instantly share code, notes, and snippets.

@jxnl
Last active August 29, 2015 14:16
Show Gist options
  • Save jxnl/b089ee641c3c0027eadc to your computer and use it in GitHub Desktop.
Save jxnl/b089ee641c3c0027eadc to your computer and use it in GitHub Desktop.
A yelp Search Request only returns min(n, 20) responses for a certain geo-query. This script will recursively Quadrisect the box until there is 20 in each box. It will also print the json output of the response
from __future__ import print_function
from Queue import Queue
import json
def subdivide(posn1, posn2):
"""
Accepts two Tuple2 and returns a generator that produces the four new boxes.
Args:
posn1 - south-west coord
posn2 - north-east coord
Yields:
posn - SW, NE tuples
"""
mid = lambda (a, b): (a + b) / 2.
a, b = posn1
c, d = posn2
bd, ac = mid(b, d), mid(a, c)
# top_left
yield ((a, bd), (ac, d))
yield ((ac, bd), (c, d))
yield ((a, b), (ac, bd))
yield ((ac, b), (c, bd))
def recurse_divide(posn1, posn2, n, queue):
"""
Populates lst with 4^n subdivisions of a box defined by posn1, posn2
Args:
posn1 - south-west coord
posn2 - north-east coord
n - number of new quadrant divisions
queue - container for new divisions
"""
for (posn11, posn22) in subdivide(posn1, posn2):
recurse_divide(posn11, posn22, n - 1, queue) if n - 1 \
else queue.put((posn11, posn22))
return None
def crawl(queue):
"""
Crawl a queue of box coordinates
Args:
queue - Queue()
"""
temp = "http://api.yelp.com/v2/search?bounds={},{}"
while not queue.empty():
p1, p2 = queue.get()
url = temp.format(*p1) + "|" + "{},{}".format(*p2)
content = yelp_search.get(url).content
response = json.loads(content)
try:
total = response[u'total']
except:
total = 0
if total > 20 and total != cur:
recurse_divide(p1, p2, 1, queue)
if total > 0:
yield response
# Initializing Emily's Jobs
# Toronto
posn1 = (43.601887,-79.409602)
posn2 = (43.693345,-79.350158)
# Create YelpSession
yelp_search = OAuth1Session(...)
# Initalize Queue and fill with 16 subdivisions
box_queue = Queue()
recurse_divide(posn1, posn2, 2, box_queue)
# Crawl!!!!
for response in crawl(box_queue):
json_str = json.dumps(response)
print(json_str)
@jxnl
Copy link
Author

jxnl commented Aug 13, 2015

rekt'd yelp.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment