Created
June 15, 2018 11:10
-
-
Save jaklinger/51a02ebc72b79371185e3ce9f09e771e to your computer and use it in GitHub Desktop.
Randomly chunk up an iterable, useful for sampling efficiently
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
def chunks(whole, n_chunks): | |
'''Randomly chunk up an iterable''' | |
# Make sure that it makes sense to chunk up the object | |
if n_chunks > len(whole) or n_chunks <= 0: | |
yield whole | |
return | |
# Copy the iterable (we'll delete it later anyway) and shuffle it | |
whole = whole.copy() | |
random.shuffle(whole) | |
# Calculate the chunk sizes | |
whole_size = len(whole) | |
chunk_size = int(whole_size / n_chunks) | |
remainder = whole_size % n_chunks | |
# Chunk it up | |
for start in range(0, n_chunks): | |
end = (start + 1)*chunk_size | |
# Add the remainder for the final chunk | |
if start == n_chunks - 1: | |
end += remainder | |
yield whole[start*chunk_size: end] | |
# Clean up | |
del whole |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment