Last active
March 25, 2022 06:00
-
-
Save abersheeran/210f5c1a6f36721302f755e39a242e50 to your computer and use it in GitHub Desktop.
Bloom filter by python3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from hashlib import blake2b | |
class BloomFilter: | |
def __init__(self, elements_count: int, *, error_rate: float = 0.0001) -> None: | |
byte_count = 1 + int( | |
-(math.log(error_rate) / math.log(2) ** 2) * elements_count // 8 | |
) | |
digest_size = int(math.log2(elements_count)) + (elements_count % 2) | |
self.bucket = bytearray(byte_count) | |
self.hash_functions = [ | |
(lambda salt: (lambda x: ( | |
int.from_bytes( | |
blake2b(x, digest_size=digest_size, salt=salt).digest(), "big", | |
) | |
% (byte_count * 8) | |
)))(salt) | |
for salt in [ | |
i.to_bytes(1, "big") | |
for i in range(int(-math.log(error_rate) // math.log(2)) + 1) | |
] | |
] | |
def add(self, value: bytes) -> None: | |
hashs = [hash_fn(value) for hash_fn in self.hash_functions] | |
for num in hashs: | |
self.bucket[num // 8] |= 2 ** (num % 8) | |
def exists(self, value: bytes) -> bool: | |
hashs = [hash_fn(value) for hash_fn in self.hash_functions] | |
for num in hashs: | |
if not self.bucket[num // 8] & (2 ** (num % 8)): | |
return False | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment