Skip to content

Instantly share code, notes, and snippets.

@chrisfarms
Created March 22, 2011 15:54

Revisions

  1. chrisfarms created this gist Mar 22, 2011.
    66 changes: 66 additions & 0 deletions each.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    from google.appengine.ext import db

    def each(query, batch_size=100):
    """yields each entity in the query over the whole dataset in batches"""
    entities = query.fetch(batch_size)
    while entities:
    for entity in entities:
    yield entity
    # if the query was keys only then the entity IS a key
    if hasattr(entities[-1],"key"):
    key = entities[-1].key()
    else:
    key = entities[-1]
    # get next batch and continue
    q = query.filter("__key__ >", key)
    entities = q.fetch(batch_size)

    class Mapper(object):
    # Subclasses should replace this with a model class (eg, model.Person).
    KIND = None

    # Subclasses can replace this with a list of (property, value) tuples to filter by.
    FILTERS = []

    # should we map just with keys?
    KEYS_ONLY = False

    def map(self, entity):
    """Updates a single entity.
    Implementers should return a tuple containing two iterables (to_update, to_delete).
    """
    return ([], [])

    def get_query(self):
    """Returns a query over the specified kind, with any appropriate filters applied."""
    q = self.KIND.all(keys_only=self.KEYS_ONLY)
    for prop, value in self.FILTERS:
    q.filter("%s =" % prop, value)
    q.order("__key__")
    return q

    def run(self, batch_size=100):
    """Executes the map procedure over all matching entities."""
    q = self.get_query()
    entities = q.fetch(batch_size)
    while entities:
    to_put = []
    to_delete = []
    for entity in entities:
    map_updates, map_deletes = self.map(entity)
    to_put.extend(map_updates)
    to_delete.extend(map_deletes)
    if to_put:
    db.put(to_put)
    if to_delete:
    db.delete(to_delete)
    q = self.get_query()
    # if the query was keys only then the entity IS a key
    if hasattr(entities[-1],"key"):
    key = entities[-1].key()
    else:
    key = entities[-1]
    # get next batch and continue
    q.filter("__key__ >", key)
    entities = q.fetch(batch_size)