Skip to content

Instantly share code, notes, and snippets.

@miratcan
Last active August 29, 2015 14:03
Show Gist options
  • Save miratcan/40ec6a75940ea358e2a6 to your computer and use it in GitHub Desktop.
Save miratcan/40ec6a75940ea358e2a6 to your computer and use it in GitHub Desktop.
Dump users and related data as json.
from django.db.models import (get_models, ManyToManyField, ForeignKey, Count)
from django.contrib.contenttypes.generic import GenericRelation
from django.core import serializers
from django.contrib.auth.models import User
from sets import Set
def foreign_keys_to(model_class):
models = {}
for model in get_models():
fields = model._meta.fields + model._meta.many_to_many
for field in filter(lambda f: hasattr(f, 'rel'), fields):
if field.rel and field.rel.to is model_class:
if model not in models:
models[model] = []
models[model].append(field)
return models
def foreign_keys_from(model_class):
fields = model_class._meta.fields
models = {}
for field in filter(lambda f: getattr(f, 'rel'), fields):
model = field.rel.to
if model not in models:
models[model] = []
models[model].append(field)
return models
def dprint(text, depth, color=''):
print ' ' * depth + '%s' % (str(text))
def cleaned(data, last_record=False):
pattern = '%s, ' if not last_record else '%s'
data = pattern % data[1:-1].replace('\n ', '\n')
return data
def write_queryset(queryset, stream, depth=0, last_record=False):
if not queryset.count():
return
data = serializers.serialize('json', queryset, indent=2)
stream.write(cleaned(data, last_record=last_record))
dprint('%s %s records written' % (len(queryset), queryset.model.__name__),
depth=depth)
def _dump(model_class, ids_to_keep, stream, depth=0, last_record=True,
dumped={}):
def dump_foreignkeys_of(model_class, ids_to_keep, stream, depth=0,
last_record=False):
#
# dprint('Dumping foreign keys of %s' % model_class.__name__,
# depth=depth)
#
related_models = foreign_keys_from(model_class)
for child_model, fields in related_models.items():
#
# Ex:
# model_class: Profile
# child_class: User,
# fields : user_id
#
# ids = [p['user_id'] for p in Profile.objects\
# .values('user_id')
# .filter(id__in=ids)]
# dump(User, ids)
#
for field in fields:
field_key = '%s_id' % field.name
raw_ids = Set([p[field_key] for p in model_class.objects
.values(field_key).filter(id__in=ids_to_keep)])
already_dumped_ids = dumped.setdefault(child_model, Set())
ids_to_keep = list(set(raw_ids) - set(already_dumped_ids))
if ids_to_keep:
# dprint('Field: %s.%s' % (
# model_class.__name__, field_key), depthd=fepth)
dump_foreignkeys_of(child_model, ids_to_keep, stream,
depth=depth+1)
write_queryset(model_class.objects.filter(id__in=ids_to_keep), stream,
depth=depth, last_record=last_record)
if model_class not in dumped:
dumped[model_class] = Set()
dumped[model_class] = dumped[model_class].union(Set(ids_to_keep))
if depth == 0:
stream.write('[')
dump_foreignkeys_of(model_class, ids_to_keep, stream)
parent_models = foreign_keys_to(model_class)
for parent_model, fields in parent_models.items():
for field in fields:
if isinstance(field, ForeignKey):
query_key = '%s_id__in'
elif isinstance(field, ManyToManyField) or \
isinstance(field, GenericRelation):
query_key = '%s__id__in'
else:
continue
raw_ids = Set(
[i['id'] for i in parent_model.objects.values('id')
.filter(**{query_key % field.name: ids_to_keep})])
already_dumped_ids = dumped.setdefault(parent_model, Set())
parent_ids_to_keep = list(set(raw_ids) - set(already_dumped_ids))
if parent_ids_to_keep:
dprint('%s %s objects whose %s field is '
'in given %s objects.' % (parent_model.__name__,
len(parent_ids_to_keep),
field.name,
parent_model.__name__),
depth=depth)
_dump(parent_model, parent_ids_to_keep, stream,
depth=depth+1, last_record=False, dumped=dumped)
write_queryset(model_class.objects.filter(id__in=ids_to_keep), stream,
last_record=last_record)
if depth == 0:
stream.write(']')
def dump(model_class, ids_to_keep, file_name):
with open(file_name, 'w') as stream:
_dump(model_class, ids_to_keep, stream)
print "Calculating user_ids_to_keep..."
user_ids_to_keep = User.objects.values('id').annotate(
num_of_orders=Count('order'))
user_ids_to_keep = [u['id'] for u in user_ids_to_keep.filter(
num_of_orders__gte=100, num_of_orders__lte=110)[:100]]
dump(User, user_ids_to_keep, 'dumps/users_and_related_data.json')
"""
user_ids_to_keep = [u.id for u in User.objects.all()[0:1]]
dump(User, user_ids_to_keep, 'dumps/test.json')
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment