Created
February 17, 2024 03:02
-
-
Save senderista/dfa3afda6ea59e75ff791bb2546f958c to your computer and use it in GitHub Desktop.
simulates hashing into 32-cell bins
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import numpy | |
class BinHasher32EntriesOneChoice: | |
def __init__(self, size_exp: int) -> None: | |
# each bin is represented by a word-size bitvector | |
self.size_exp: int = size_exp | |
self.array = numpy.zeros([2 ** (self.size_exp - 5)], dtype='uint32') | |
self.overflows: int = 0 | |
self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 5) - 1) << numpy.uint64(64 - self.size_exp) | |
""" | |
Returns True if insert was successful, False on overflow. | |
""" | |
def insert(self, num: int) -> bool: | |
# find bin using leftmost {log2(bins)} bits of value | |
bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp) | |
bin_word: numpy.uint32 = self.array[bin_index] | |
# if bin is full then increment overflow counter and exit | |
if bin_word == 0xffffffff: | |
self.overflows += 1 | |
return False | |
# find rightmost unset bit in bin word and set it | |
# (this cannot overflow since we already checked for the max word value) | |
bin_word = bin_word | numpy.uint32(bin_word + 1) | |
self.array[bin_index] = bin_word | |
return True | |
if __name__ == "__main__": | |
for size_exp in range(8, 27): | |
print(f"Table size: {2 ** size_exp}") | |
table = BinHasher32EntriesOneChoice(size_exp) | |
first_overflow = False | |
for i in range(1, (2 ** table.size_exp) + 1): | |
num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64) | |
if not table.insert(num) and not first_overflow: | |
first_overflow = True | |
print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})") | |
print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment