-
-
Save mmalone/351429 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
from eventlet import patcher, greenpool, pools | |
patcher.monkey_patch() | |
import simplegeo | |
LAYER = 'your.layer.identifier.here' | |
OAUTH_KEY = 'YOUR OAUTH KEY HERE' | |
OAUTH_SECRET = 'YOUR OAUTH SECRET HERE' | |
class SimpleGeoPool(pools.Pool): | |
""" | |
A pool of ``simplegeo.Client`` object instances, that can be used in a | |
cooperative manner to access the SimpleGeo API. | |
""" | |
def create(self): | |
return simplegeo.Client(key=OAUTH_KEY, secret=OAUTH_SECRET) | |
class RandomDataInserter(object): | |
def __init__(self, layer, pool_size=500, lat_min=37.740262, | |
lat_max=37.80975, lon_min=-122.461274, lon_max=-122.40599): | |
self.layer = layer | |
self.pool_size = pool_size | |
self.lat_min = lat_min | |
self.lat_max = lat_max | |
self.lon_min = lon_min | |
self.lon_max = lon_max | |
self.current_id = 0 | |
self.requests_made = 0 | |
self.pool = greenpool.GreenPool(pool_size) | |
self.clients = SimpleGeoPool(max_size=pool_size) | |
def insert_random_data(self, batch_size): | |
starting_id = self.current_id | |
# First build the list of records | |
records = [] | |
for i in xrange(batch_size): | |
r = simplegeo.Record( | |
layer=self.layer, | |
id=str(self.current_id), | |
lat=random.uniform(self.lat_min, self.lat_max), | |
lon=random.uniform(self.lon_min, self.lon_max) | |
) | |
self.current_id += 1 | |
records.append(r) | |
# Now insert that via the SimpleGeo API | |
with self.clients.item() as client: | |
client.add_records(self.layer, records) | |
self.requests_made += 1 | |
print 'Inserted record ids %d through %d' % ( | |
starting_id, | |
starting_id + batch_size, | |
) | |
def destroy_cassandra(self, batch_size, num): | |
""" | |
Kicks off the insertion process, hopefully killing their Cassandra | |
cluster and showing those NoSQL weenies that they should just add an | |
index. | |
Paramters: | |
``batch_size``: | |
The number of records to send per API call. | |
``num``: | |
The total number of records to insert. | |
""" | |
created = 0 | |
while created < num: | |
self.pool.spawn_n(self.insert_random_data, batch_size) | |
created += batch_size | |
self.pool.waitall() | |
if __name__ == '__main__': | |
started = time.time() | |
inserter = RandomDataInserter(LAYER) | |
num = 100000 | |
inserter.destroy_cassandra(100, num) | |
duration = time.time() - started | |
print 'Took %s seconds to insert %d records (%s per second)' % ( | |
duration, | |
num, | |
num / duration, | |
) | |
print 'Used %s API requests' % (inserter.requests_made,) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment