Last active
August 29, 2015 14:03
-
-
Save miratcan/40ec6a75940ea358e2a6 to your computer and use it in GitHub Desktop.
Dump users and related data as json.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from django.db.models import (get_models, ManyToManyField, ForeignKey, Count) | |
from django.contrib.contenttypes.generic import GenericRelation | |
from django.core import serializers | |
from django.contrib.auth.models import User | |
from sets import Set | |
def foreign_keys_to(model_class): | |
models = {} | |
for model in get_models(): | |
fields = model._meta.fields + model._meta.many_to_many | |
for field in filter(lambda f: hasattr(f, 'rel'), fields): | |
if field.rel and field.rel.to is model_class: | |
if model not in models: | |
models[model] = [] | |
models[model].append(field) | |
return models | |
def foreign_keys_from(model_class): | |
fields = model_class._meta.fields | |
models = {} | |
for field in filter(lambda f: getattr(f, 'rel'), fields): | |
model = field.rel.to | |
if model not in models: | |
models[model] = [] | |
models[model].append(field) | |
return models | |
def dprint(text, depth, color=''): | |
print ' ' * depth + '%s' % (str(text)) | |
def cleaned(data, last_record=False): | |
pattern = '%s, ' if not last_record else '%s' | |
data = pattern % data[1:-1].replace('\n ', '\n') | |
return data | |
def write_queryset(queryset, stream, depth=0, last_record=False): | |
if not queryset.count(): | |
return | |
data = serializers.serialize('json', queryset, indent=2) | |
stream.write(cleaned(data, last_record=last_record)) | |
dprint('%s %s records written' % (len(queryset), queryset.model.__name__), | |
depth=depth) | |
def _dump(model_class, ids_to_keep, stream, depth=0, last_record=True, | |
dumped={}): | |
def dump_foreignkeys_of(model_class, ids_to_keep, stream, depth=0, | |
last_record=False): | |
# | |
# dprint('Dumping foreign keys of %s' % model_class.__name__, | |
# depth=depth) | |
# | |
related_models = foreign_keys_from(model_class) | |
for child_model, fields in related_models.items(): | |
# | |
# Ex: | |
# model_class: Profile | |
# child_class: User, | |
# fields : user_id | |
# | |
# ids = [p['user_id'] for p in Profile.objects\ | |
# .values('user_id') | |
# .filter(id__in=ids)] | |
# dump(User, ids) | |
# | |
for field in fields: | |
field_key = '%s_id' % field.name | |
raw_ids = Set([p[field_key] for p in model_class.objects | |
.values(field_key).filter(id__in=ids_to_keep)]) | |
already_dumped_ids = dumped.setdefault(child_model, Set()) | |
ids_to_keep = list(set(raw_ids) - set(already_dumped_ids)) | |
if ids_to_keep: | |
# dprint('Field: %s.%s' % ( | |
# model_class.__name__, field_key), depthd=fepth) | |
dump_foreignkeys_of(child_model, ids_to_keep, stream, | |
depth=depth+1) | |
write_queryset(model_class.objects.filter(id__in=ids_to_keep), stream, | |
depth=depth, last_record=last_record) | |
if model_class not in dumped: | |
dumped[model_class] = Set() | |
dumped[model_class] = dumped[model_class].union(Set(ids_to_keep)) | |
if depth == 0: | |
stream.write('[') | |
dump_foreignkeys_of(model_class, ids_to_keep, stream) | |
parent_models = foreign_keys_to(model_class) | |
for parent_model, fields in parent_models.items(): | |
for field in fields: | |
if isinstance(field, ForeignKey): | |
query_key = '%s_id__in' | |
elif isinstance(field, ManyToManyField) or \ | |
isinstance(field, GenericRelation): | |
query_key = '%s__id__in' | |
else: | |
continue | |
raw_ids = Set( | |
[i['id'] for i in parent_model.objects.values('id') | |
.filter(**{query_key % field.name: ids_to_keep})]) | |
already_dumped_ids = dumped.setdefault(parent_model, Set()) | |
parent_ids_to_keep = list(set(raw_ids) - set(already_dumped_ids)) | |
if parent_ids_to_keep: | |
dprint('%s %s objects whose %s field is ' | |
'in given %s objects.' % (parent_model.__name__, | |
len(parent_ids_to_keep), | |
field.name, | |
parent_model.__name__), | |
depth=depth) | |
_dump(parent_model, parent_ids_to_keep, stream, | |
depth=depth+1, last_record=False, dumped=dumped) | |
write_queryset(model_class.objects.filter(id__in=ids_to_keep), stream, | |
last_record=last_record) | |
if depth == 0: | |
stream.write(']') | |
def dump(model_class, ids_to_keep, file_name): | |
with open(file_name, 'w') as stream: | |
_dump(model_class, ids_to_keep, stream) | |
print "Calculating user_ids_to_keep..." | |
user_ids_to_keep = User.objects.values('id').annotate( | |
num_of_orders=Count('order')) | |
user_ids_to_keep = [u['id'] for u in user_ids_to_keep.filter( | |
num_of_orders__gte=100, num_of_orders__lte=110)[:100]] | |
dump(User, user_ids_to_keep, 'dumps/users_and_related_data.json') | |
""" | |
user_ids_to_keep = [u.id for u in User.objects.all()[0:1]] | |
dump(User, user_ids_to_keep, 'dumps/test.json') | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment