Created
October 23, 2011 19:22
-
-
Save andres-erbsen/1307752 to your computer and use it in GitHub Desktop.
Shuffle for python iterators. This works by holding `bufsize` items back and yielding them sometime later. This is NOT 100% random, proved or anything.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import random | |
def itershuffle(iterable,bufsize=1000): | |
"""Shuffle an iterator. This works by holding `bufsize` items back | |
and yielding them sometime later. This is NOT 100% random, proved or anything.""" | |
iterable = iter(iterable) | |
buf = [] | |
try: | |
while True: | |
for i in xrange(random.randint(1, bufsize-len(buf))): | |
buf.append(iterable.next()) | |
random.shuffle(buf) | |
for i in xrange(random.randint(1, bufsize)): | |
if buf: | |
yield buf.pop() | |
else: | |
break | |
except StopIteration: | |
random.shuffle(buf) | |
while buf: | |
yield buf.pop() | |
raise StopIteration | |
if __name__ == '__main__': | |
# demonstration | |
print ' '.join(iter('abcdefghijklmnopqrstuv'.upper())) | |
for i in range(10): | |
print ' '.join(itershuffle('abcdefghijklmnopqrstuv'.upper())) | |
# no item is lost in process | |
for i in range(1000): | |
assert set(itershuffle(range(100))) == set(range(100)) | |
# memory usage test. if it was not iterative, we would run out of memory | |
#~ for i in itershuffle(xrange(10000000),10): | |
#~ pass |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def itershuffle(iterable):
iterable = iter(iterable)
buf = []
try:
while len(buf) < 100:
buf.append(next(iterable))
while True:
i = (random.random() * 100) // 1
yield buf[i]
try:
buf[i] = next(iterable)
except StopIteration:
buf.pop(i)
raise StopIteration
except StopIteration:
random.shuffle(buf)
while buf:
yield buf.pop()
raise StopIteration
Inspired by you so I thought I'd share. For python3
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Using a heap would probably be faster and easier to reason about.