Skip to content

Instantly share code, notes, and snippets.

@senderista
Created February 17, 2024 03:03
Show Gist options
  • Save senderista/a2ed66164c0865d32a919629beb6bc02 to your computer and use it in GitHub Desktop.
Save senderista/a2ed66164c0865d32a919629beb6bc02 to your computer and use it in GitHub Desktop.
simulates hashing into least loaded of 2 32-cell bins
#!/usr/bin/env python3
import numpy
class BinHasher32EntriesTwoChoices:
def __init__(self, size_exp: int) -> None:
# each bin is represented by a word-size bitvector
self.size_exp: int = size_exp
self.array = numpy.zeros([2 ** (self.size_exp - 5)], dtype='uint32')
self.overflows: int = 0
self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 5) - 1) << numpy.uint64(64 - self.size_exp)
"""
Returns True if insert was successful, False on overflow.
"""
def insert(self, num: int) -> bool:
# first calculate secondary bin
# randomly permute num using Murmur3 64-bit finalizer (inline for speed)
sec_num = num
sec_num ^= sec_num >> numpy.uint64(33)
sec_num *= 0xff51afd7ed558ccd
sec_num ^= sec_num >> numpy.uint64(33)
sec_num *= 0xc4ceb9fe1a85ec53
sec_num ^= sec_num >> numpy.uint64(33)
# find bin using leftmost {log2(bins)} bits of permuted value
pri_bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp)
sec_bin_index: int = numpy.uint64(sec_num & self.bin_mask) >> numpy.uint64(64 - self.size_exp)
pri_bin_word: numpy.uint32 = self.array[pri_bin_index]
sec_bin_word: numpy.uint32 = self.array[sec_bin_index]
pri_bin_count = pri_bin_word.bit_count()
sec_bin_count = sec_bin_word.bit_count()
# if both bins are full then increment overflow counter and exit
if pri_bin_count == 32 and sec_bin_count == 32:
self.overflows += 1
return False
# prefer primary bin on ties
# set rightmost unset bit in least loaded bin
if pri_bin_count <= sec_bin_count:
pri_bin_word = pri_bin_word | (pri_bin_word + 1)
self.array[pri_bin_index] = pri_bin_word
else:
sec_bin_word = sec_bin_word | (sec_bin_word + 1)
self.array[sec_bin_index] = sec_bin_word
return True
if __name__ == "__main__":
for size_exp in range(8, 27):
print(f"Table size: {2 ** size_exp}")
table = BinHasher32EntriesTwoChoices(size_exp)
first_overflow = False
for i in range(1, (2 ** table.size_exp) + 1):
num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64)
if not table.insert(num) and not first_overflow:
first_overflow = True
print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})")
print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment