Created
March 22, 2011 15:54
Revisions
-
chrisfarms created this gist
Mar 22, 2011 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,66 @@ from google.appengine.ext import db def each(query, batch_size=100): """yields each entity in the query over the whole dataset in batches""" entities = query.fetch(batch_size) while entities: for entity in entities: yield entity # if the query was keys only then the entity IS a key if hasattr(entities[-1],"key"): key = entities[-1].key() else: key = entities[-1] # get next batch and continue q = query.filter("__key__ >", key) entities = q.fetch(batch_size) class Mapper(object): # Subclasses should replace this with a model class (eg, model.Person). KIND = None # Subclasses can replace this with a list of (property, value) tuples to filter by. FILTERS = [] # should we map just with keys? KEYS_ONLY = False def map(self, entity): """Updates a single entity. Implementers should return a tuple containing two iterables (to_update, to_delete). """ return ([], []) def get_query(self): """Returns a query over the specified kind, with any appropriate filters applied.""" q = self.KIND.all(keys_only=self.KEYS_ONLY) for prop, value in self.FILTERS: q.filter("%s =" % prop, value) q.order("__key__") return q def run(self, batch_size=100): """Executes the map procedure over all matching entities.""" q = self.get_query() entities = q.fetch(batch_size) while entities: to_put = [] to_delete = [] for entity in entities: map_updates, map_deletes = self.map(entity) to_put.extend(map_updates) to_delete.extend(map_deletes) if to_put: db.put(to_put) if to_delete: db.delete(to_delete) q = self.get_query() # if the query was keys only then the entity IS a key if hasattr(entities[-1],"key"): key = entities[-1].key() else: key = entities[-1] # get next batch and continue q.filter("__key__ >", key) entities = q.fetch(batch_size)