Skip to content

Instantly share code, notes, and snippets.

@senderista
Last active February 17, 2024 04:52
Show Gist options
  • Save senderista/f43b4e80ad11c79f7c24c02b487a97cf to your computer and use it in GitHub Desktop.
Save senderista/f43b4e80ad11c79f7c24c02b487a97cf to your computer and use it in GitHub Desktop.
simulates hashing into least loaded of 2 8-cell bins
#!/usr/bin/env python3
import numpy
class BinHasher8EntriesTwoChoices:
def __init__(self, size_exp: int) -> None:
# each bin is represented by a byte-size bitvector
self.size_exp: int = size_exp
self.array = numpy.zeros([2 ** (self.size_exp - 3)], dtype='uint8')
self.overflows: int = 0
self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 3) - 1) << numpy.uint64(64 - self.size_exp)
"""
Returns True if insert was successful, False on overflow.
"""
def insert(self, num: int) -> bool:
# first calculate secondary bin
# randomly permute num using Murmur3 64-bit finalizer (inline for speed)
sec_num = num
sec_num ^= sec_num >> numpy.uint64(33)
sec_num *= 0xff51afd7ed558ccd
sec_num ^= sec_num >> numpy.uint64(33)
sec_num *= 0xc4ceb9fe1a85ec53
sec_num ^= sec_num >> numpy.uint64(33)
# find bin using leftmost {log2(bins)} bits of permuted value
pri_bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp)
sec_bin_index: int = numpy.uint64(sec_num & self.bin_mask) >> numpy.uint64(64 - self.size_exp)
pri_bin_byte: numpy.uint8 = self.array[pri_bin_index]
sec_bin_byte: numpy.uint8 = self.array[sec_bin_index]
pri_bin_count = pri_bin_byte.bit_count()
sec_bin_count = sec_bin_byte.bit_count()
# if both bins are full then increment overflow counter and exit
if pri_bin_count == 8 and sec_bin_count == 8:
self.overflows += 1
return False
# prefer primary bin on ties
# set rightmost unset bit in least loaded bin
if pri_bin_count <= sec_bin_count:
pri_bin_byte = pri_bin_byte | (pri_bin_byte + 1)
self.array[pri_bin_index] = pri_bin_byte
else:
sec_bin_byte = sec_bin_byte | (sec_bin_byte + 1)
self.array[sec_bin_index] = sec_bin_byte
return True
if __name__ == "__main__":
for size_exp in range(8, 27):
print(f"Table size: {2 ** size_exp}")
table = BinHasher8EntriesTwoChoices(size_exp)
first_overflow = False
for i in range(1, (2 ** table.size_exp) + 1):
num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64)
if not table.insert(num) and not first_overflow:
first_overflow = True
print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})")
print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment