Last active
August 12, 2021 21:56
-
-
Save mx-moth/4e283fea7143e64852dcc09b14a183f7 to your computer and use it in GitHub Desktop.
Split a generator up in to chunks of size `n`, without walking the list or keeping items in memory needlessly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain, islice | |
class splitter: | |
"""Helper class for splitat.""" | |
def __init__(self, iterable, count): | |
self.iterator = iter(iterable) | |
self.count = count | |
self.queue = [] | |
def split(self): | |
return self.prefix(), self.suffix() | |
def prefix(self): | |
while self.count or self.queue: | |
# If there are items in the queue, then the suffix has been | |
# accessed before the prefix was consumed. Yield the remaining | |
# items from the queue and then clean up. | |
if self.queue: | |
queue = self.queue | |
# Consume the queue, poping items off the stack so they can be | |
# garbage collected as they are consumed. | |
while queue: | |
yield queue.pop(0) | |
del queue | |
del self.queue | |
# Finish up, the prefix has been exhausted. | |
return | |
# If the queue is empty, consume from the iterator as normal | |
try: | |
self.count -= 1 | |
yield next(self.iterator) | |
except StopIteration: | |
return | |
def suffix(self): | |
# If the whole prefix has not been consumed yet, queue up the remaining | |
# items for the prefix to yield later. | |
if self.count: | |
self.queue = list(islice(self.iterator, self.count)) | |
# Yield the remaining items from the iterator. Can't just return the | |
# iterator here, as that would eagerly consume the prefix. | |
yield from self.iterator | |
def splitat(iterable, count): | |
""" | |
Splits an iterable into a prefix and a suffix. The prefix contains the | |
first count items of the iterable, while suffix contains the rest, if any. | |
If the iterable has less than count element, the prefix will be shorter | |
than count elements. | |
""" | |
return splitter(iterable, count).split() | |
def gen_one(item): | |
"""Makes a generator that yields the single argument to this function.""" | |
yield item | |
def isempty(iterable): | |
""" | |
Checks if an iterator is empty by peeking at its next element. Returns | |
a tuple of (empty, iterator). The returned iterator should be used instead | |
of the iterator that was passed in, as that iterator is now missing its | |
first item. | |
""" | |
try: | |
next_object = next(iterable) | |
except StopIteration: | |
return True, iterable | |
else: | |
head = gen_one(next_object) | |
del next_object | |
return False, chain(head, iterable) | |
def chunksof(iterable, n): | |
""" | |
Splits an iterable into chunks of length n. If there is not round number of | |
n chunks in iterable, the last chunk will be shorter than n. | |
""" | |
iterator = iter(iterable) | |
while True: | |
# Quit if the iterator is empty | |
empty, iterator = isempty(iterator) | |
if empty: | |
return | |
# Split the iterator and yield a chunk | |
chunk, iterator = splitat(iterator, n) | |
yield chunk |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from chunks import chunksof, splitat | |
class VerboseInt: | |
def __init__(self, i): | |
print("Just made", i) | |
self.i = i | |
def __str__(self): | |
return str(self.i) | |
def __repr__(self): | |
return repr(self.i) | |
def __eq__(self, other): | |
return self.i == other | |
def __del__(self): | |
print("Deleting", self.i) | |
def verbose_range(*args): | |
for i in range(*args): | |
yield VerboseInt(i) | |
def test(): | |
print("Making the chunks") | |
chunks = chunksof(verbose_range(11), 3) | |
print("Getting the first chunk") | |
first_chunk = next(chunks) | |
print("Testing first chunk item by item") | |
assert next(first_chunk) == 0 | |
assert next(first_chunk) == 1 | |
assert next(first_chunk) == 2 | |
try: | |
out = next(first_chunk) | |
except StopIteration: | |
pass | |
else: | |
assert False, f"Should have raised StopIteration, got {out} instead" | |
print("Getting second chunk") | |
second_chunk = next(chunks) | |
print("Getting third chunk") | |
third_chunk = next(chunks) | |
print("Testing second and third chunk, item by item") | |
assert next(third_chunk) == 6 | |
assert next(second_chunk) == 3 | |
assert next(third_chunk) == 7 | |
assert next(second_chunk) == 4 | |
assert next(second_chunk) == 5 | |
assert next(third_chunk) == 8 | |
print("Done! What is left?") | |
print(list(chunks)) | |
chunks.close() | |
del chunks | |
prefix, suffix = splitat(verbose_range(10), 5) | |
print("Testing prefix") | |
assert next(prefix) == 0 | |
assert next(prefix) == 1 | |
print("Testing suffix") | |
assert next(suffix) == 5 | |
assert next(suffix) == 6 | |
print("Testing prefix again") | |
assert next(prefix) == 2 | |
assert next(prefix) == 3 | |
assert next(prefix) == 4 | |
try: | |
out = next(prefix) | |
except StopIteration: | |
pass | |
else: | |
assert False, f"Should have raised StopIteration, got {out} instead" | |
assert next(suffix) == 7 | |
if __name__ == '__main__': | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment