Created
July 4, 2016 17:11
-
-
Save un1t/536f4391b6091097aae5e40b57e61f68 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gc | |
from more_itertools import chunked | |
from collections import defaultdict | |
def fetch_fk(fk_field, fields, objects): | |
assert 'id' in fields | |
if not objects: | |
return | |
relates_ids = [obj[fk_field.field.column] for obj in objects] | |
related_objects = fk_field.get_queryset().filter(id__in=relates_ids).values(*fields) | |
related_dict = {obj['id']:obj for obj in related_objects} | |
for obj in objects: | |
obj[fk_field.field.name] = related_dict.get(obj[fk_field.field.column]) | |
def fetch_m2m(m2m_field, fields, objects): | |
assert 'id' in fields | |
if not objects: | |
return [] | |
objects_ids = [obj['id'] for obj in objects] | |
related_model = m2m_field.field.related_model | |
m2m_column_name = m2m_field.field.m2m_column_name() | |
m2m_reverse_name= m2m_field.field.m2m_reverse_name() | |
related_queryset = m2m_field.through.objects.all() | |
throughs = related_queryset.filter(**{m2m_column_name + '__in': objects_ids}).values() | |
through_dict = defaultdict(list) | |
for t in throughs: | |
through_dict[t[m2m_column_name]].append(t[m2m_reverse_name]) | |
related_ids = [t[m2m_reverse_name] for t in throughs] | |
related_objects = related_model.objects.filter(id__in=related_ids).values(*fields) | |
related_objects_dict = {obj['id']: obj for obj in related_objects} | |
for obj in objects: | |
obj[m2m_field.field.name] = [] | |
related_ids = through_dict[obj['id']] | |
for related_id in related_ids: | |
related_obj = related_objects_dict[related_id] | |
obj[m2m_field.field.name].append(related_obj) | |
def fast_model_to_dict(instance, fields=None, exclude=None): | |
# TODO: filefield | |
if fields is None: | |
fields = get_field_names(instance) | |
if exclude: | |
for field in exclude: | |
fields.remove(field) | |
d = {} | |
for field in fields: | |
d[field] = getattr(instance, field) | |
return d | |
def get_field_names(obj): | |
return [field.name for field in obj._meta.fields if not field.rel] | |
def queryset_iterator(queryset, chunksize=1000): | |
''''' | |
Iterate over a Django Queryset ordered by the primary key | |
This method loads a maximum of chunksize (default: 1000) rows in it's | |
memory at the same time while django normally would load all rows in it's | |
memory. Using the iterator() method only causes it to not preload all the | |
classes. | |
Note that the implementation of the iterator does not support ordered query sets. | |
''' | |
pk = 0 | |
try: | |
last_pk = queryset.order_by('-pk')[0].pk | |
except IndexError: | |
return | |
queryset = queryset.order_by('pk') | |
while pk < last_pk: | |
for obj in queryset.filter(pk__gt=pk)[:chunksize]: | |
pk = obj.pk | |
yield obj | |
gc.collect() | |
def chunked_queryset(queryset, chunksize=1000): | |
for chunk in chunked(queryset_iterator(queryset, chunksize), chunksize): | |
yield chunk | |
def dictfetchall(cursor): | |
"Return all rows from a cursor as a dict" | |
columns = [col[0] for col in cursor.description] | |
return [ | |
dict(zip(columns, row)) | |
for row in cursor.fetchall() | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment