Created
March 22, 2011 15:54
-
-
Save chrisfarms/881458 to your computer and use it in GitHub Desktop.
Function to map over all entities in a GAE query
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.appengine.ext import db | |
def each(query, batch_size=100): | |
"""yields each entity in the query over the whole dataset in batches""" | |
entities = query.fetch(batch_size) | |
while entities: | |
for entity in entities: | |
yield entity | |
# if the query was keys only then the entity IS a key | |
if hasattr(entities[-1],"key"): | |
key = entities[-1].key() | |
else: | |
key = entities[-1] | |
# get next batch and continue | |
q = query.filter("__key__ >", key) | |
entities = q.fetch(batch_size) | |
class Mapper(object): | |
# Subclasses should replace this with a model class (eg, model.Person). | |
KIND = None | |
# Subclasses can replace this with a list of (property, value) tuples to filter by. | |
FILTERS = [] | |
# should we map just with keys? | |
KEYS_ONLY = False | |
def map(self, entity): | |
"""Updates a single entity. | |
Implementers should return a tuple containing two iterables (to_update, to_delete). | |
""" | |
return ([], []) | |
def get_query(self): | |
"""Returns a query over the specified kind, with any appropriate filters applied.""" | |
q = self.KIND.all(keys_only=self.KEYS_ONLY) | |
for prop, value in self.FILTERS: | |
q.filter("%s =" % prop, value) | |
q.order("__key__") | |
return q | |
def run(self, batch_size=100): | |
"""Executes the map procedure over all matching entities.""" | |
q = self.get_query() | |
entities = q.fetch(batch_size) | |
while entities: | |
to_put = [] | |
to_delete = [] | |
for entity in entities: | |
map_updates, map_deletes = self.map(entity) | |
to_put.extend(map_updates) | |
to_delete.extend(map_deletes) | |
if to_put: | |
db.put(to_put) | |
if to_delete: | |
db.delete(to_delete) | |
q = self.get_query() | |
# if the query was keys only then the entity IS a key | |
if hasattr(entities[-1],"key"): | |
key = entities[-1].key() | |
else: | |
key = entities[-1] | |
# get next batch and continue | |
q.filter("__key__ >", key) | |
entities = q.fetch(batch_size) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment