Created
December 7, 2011 23:02
-
-
Save acdha/1445182 to your computer and use it in GitHub Desktop.
Python iterator using arbitrary slices: handy for processing objects like django-haystack SearchQuerySets which trigger backend I/O on slicing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import count | |
def chunked_iterator(iterable, chunk_size): | |
"""Given a slice-able yield individual items but consume them in chunk_size | |
batches so we can e.g. retrieve records from Solr in 50-100 batches | |
rather than the default 10 | |
Note that unlike the itertools grouper example we must use slice notation | |
to trigger things like Haystack's sliced __getitem__ to set our own batch | |
size | |
""" | |
for i in count(): | |
start = i * chunk_size | |
chunk = iterable[start:start + chunk_size] | |
for j in chunk: | |
yield j | |
if len(chunk) < chunk_size: | |
break # We're at the last page of results |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_chunked_iterator(self): | |
"""Ensure that chunked_iterator doesn't change semantics from normal | |
Python iteration. Somewhat pedantic but just in case... | |
""" | |
self.assertEqual(list(chunked_iterator(range(0, 10), 3)), | |
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
# Even multiple of chunk size: | |
self.assertEqual(list(chunked_iterator(range(0, 9), 3)), | |
[0, 1, 2, 3, 4, 5, 6, 7, 8]) | |
# Less than one chunk: | |
self.assertEqual(list(chunked_iterator(range(0, 1), 3)), | |
[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment