Last active
February 9, 2018 09:45
-
-
Save baxeico/4178f196da2b9ef6d856 to your computer and use it in GitHub Desktop.
queryset_iterator, a function to iterate over huge Django querysets without using too much memory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gc | |
def queryset_iterator(qs, batchsize = 500, gc_collect = True): | |
iterator = qs.values_list('pk', flat=True).order_by('pk').distinct().iterator() | |
eof = False | |
while not eof: | |
primary_key_buffer = [] | |
try: | |
while len(primary_key_buffer) < batchsize: | |
primary_key_buffer.append(iterator.next()) | |
except StopIteration: | |
eof = True | |
for obj in qs.filter(pk__in=primary_key_buffer).order_by('pk').iterator(): | |
yield obj | |
if gc_collect: | |
gc.collect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment