Skip to content

Instantly share code, notes, and snippets.

@kolen
Last active November 11, 2020 18:43
Show Gist options
  • Save kolen/a787d168346da78fd88e39f3ba8501f9 to your computer and use it in GitHub Desktop.
Save kolen/a787d168346da78fd88e39f3ba8501f9 to your computer and use it in GitHub Desktop.
Google app engine backup dump to json
import sys
import json
import datetime
sys.path.append('/usr/local/Caskroom/google-cloud-sdk/latest/google-cloud-sdk/platform/google_appengine/')
from google.appengine.api.files import records
from google.appengine.datastore import entity_pb
from google.appengine.api import datastore
def extract(index):
raw = open('backups/2020-11-10T18:54:56_95621/default_namespace/all_kinds/output-'+str(index), 'r')
reader = records.RecordsReader(raw)
result = []
for record in reader:
entity_proto = entity_pb.EntityProto(contents=record)
entity = datastore.Entity.FromPb(entity_proto)
if entity.kind().startswith('foo_'):
rec = dict(entity)
# https://cloud.google.com/appengine/docs/standard/python/datastore/keyclass
rec['id'] = entity.key().id_or_name()
rec['kind'] = entity.kind()
result.append(rec)
return result
def extract_all():
# put your number of output-* files
return [entry for i in range(0, 23) for entry in extract(i)]
class DumpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
return json.JSONEncoder.default(self, obj)
print json.dumps(extract_all(), cls=DumpEncoder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment