Created
February 17, 2024 02:53
-
-
Save senderista/a4930e5ae9057aa64416e714b601dc1e to your computer and use it in GitHub Desktop.
simulates hashing into 16-cell bins
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import numpy | |
| class BinHasher16EntriesOneChoice: | |
| def __init__(self, size_exp: int) -> None: | |
| # each bin is represented by a word-size bitvector | |
| self.size_exp: int = size_exp | |
| self.array = numpy.zeros([2 ** (self.size_exp - 4)], dtype='uint16') | |
| self.overflows: int = 0 | |
| self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 4) - 1) << numpy.uint64(64 - self.size_exp) | |
| """ | |
| Returns True if insert was successful, False on overflow. | |
| """ | |
| def insert(self, num: int) -> bool: | |
| # find bin using leftmost {log2(bins)} bits of value | |
| bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp) | |
| bin_word: numpy.uint16 = self.array[bin_index] | |
| # if bin is full then increment overflow counter and exit | |
| if bin_word == 0xffff: | |
| self.overflows += 1 | |
| return False | |
| # find rightmost unset bit in bin word and set it | |
| # (this cannot overflow since we already checked for the max word value) | |
| bin_word = bin_word | numpy.uint16(bin_word + 1) | |
| self.array[bin_index] = bin_word | |
| return True | |
| if __name__ == "__main__": | |
| for size_exp in range(8, 27): | |
| print(f"Table size: {2 ** size_exp}") | |
| table = BinHasher16EntriesOneChoice(size_exp) | |
| first_overflow = False | |
| for i in range(1, (2 ** table.size_exp) + 1): | |
| num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64) | |
| if not table.insert(num) and not first_overflow: | |
| first_overflow = True | |
| print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})") | |
| print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment