Last active
July 20, 2019 11:47
-
-
Save louisswarren/34bd94aefcb4f22d0fdddeee5b0a34af to your computer and use it in GitHub Desktop.
Min heap using a tape of booleans
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Idea suggested by Michael Cowie | |
import itertools | |
class BloomHeap: | |
def __init__(self, *values): | |
self.tape = [] | |
if len(values) == 1 and not isinstance(values[0], int): | |
self += values[0] | |
elif values: | |
try: | |
self.tape = [False for _ in range(max(values) + 1)] | |
for value in values: | |
self.tape[value] = True | |
except TypeError: | |
raise TypeError("BloomHeap values must be natural numbers") | |
def __add__(self, other): | |
return BloomHeap(*self, *other) | |
def __contains__(self, x): | |
return self.tape[x] if isinstance(x, int) and x >= 0 else False | |
def __eq__(self, other): | |
if not isinstance(other, BloomHeap): | |
return False | |
for x, y in itertools.zip_longest(self, other): | |
# BloomHeap values will never be None, but the fillvalue is None | |
if x != y: | |
return False | |
return True | |
def __getitem__(self, n): | |
for x in self: | |
if n == 0: | |
return x | |
n -= 1 | |
raise IndexError("BloomHeap index out of range") | |
def __iadd__(self, other): | |
for x in other: | |
self.add(x) | |
def __iter__(self): | |
for i, v in enumerate(self.tape): | |
if v: | |
yield i | |
def __len__(self): | |
i = 0 | |
for _ in self: | |
i += 1 | |
return i | |
def __repr__(self): | |
return "BloomHeap({})".format(', '.join(map(repr, self))) | |
def add(self, x): | |
try: | |
if x >= len(self.tape): | |
self.tape += [False for _ in range(x + 1 - len(self.tape))] | |
self.tape[x] = True | |
except TypeError: | |
raise TypeError("BloomHeap values must be natural numbers") | |
def copy(self): | |
"""Return a shallow copy of the BloomHeap. | |
You may want to do this if you have deleted the previous max element.""" | |
return BloomHeap(*self) | |
def pop(self, n=0): | |
for i, v in enumerate(self.tape): | |
if v: | |
if n == 0: | |
self.tape[i] = False | |
return i | |
else: | |
n -= 1 | |
raise IndexError("pop index out of range") | |
def remove(self, x): | |
if isinstance(x, int) and x >= 0: | |
self.tape[x] = False | |
x = BloomHeap(1, 5) | |
y = BloomHeap([3, 10]) | |
print(x) | |
print(y) | |
assert(5 in x) | |
assert("foo" not in x) | |
x.add(7) | |
print(x) | |
x.remove(7) | |
print(x) | |
x = x + y | |
print(x) | |
assert(x == BloomHeap(10, 3, 1, 5)) | |
assert(x != BloomHeap(3, 1, 5)) | |
assert(x[1] == 3) | |
try: | |
x[4] | |
assert(False) | |
except IndexError: | |
pass | |
print(len(x)) | |
print(bool(x)) | |
print(x.pop(1)) | |
print(x) | |
# Benchmark | |
from time import time | |
from random import sample | |
# BloomHeap tape is half-populated | |
x = sample(range(1000000), 500000) | |
lt = time() | |
ls = sorted(x) | |
ltt = time() | |
print("List time:", ltt - lt) | |
bt = time() | |
bs = iter(BloomHeap(x)) | |
btt = time() | |
print("Bloom time:", btt - bt) | |
""" | |
BloomHeap(1, 5) | |
BloomHeap(3, 10) | |
BloomHeap(1, 5, 7) | |
BloomHeap(1, 5) | |
BloomHeap(1, 3, 5, 10) | |
4 | |
True | |
3 | |
BloomHeap(1, 5, 10) | |
List time: 0.15860724449157715 | |
Bloom time: 0.27932047843933105 | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment