Last active
December 2, 2016 21:11
-
-
Save markostam/9150e6c27a758579ba07903bc0e96ecb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bitarray import bitarray | |
class BloomFilter: | |
def __init__(self): | |
# use bitarray instead of list to save space | |
self.ba = bitarray(10**8) | |
def _hash_fxn1(self, x): | |
if type(x) == str: | |
b = format(ord(x), 'b') | |
else: | |
b = format(x, 'b') | |
y = (i[1] for i in enumerate(b) if i[0]%2 == 0) | |
y = int(''.join(y),2) | |
return y%11 | |
def _hash_fxn2(self, x): | |
if type(x) == str: | |
b = (ord(x), 'b') | |
else: | |
b = format(x, 'b') | |
y = (i[1] for i in enumerate(b) if i[0]%2 != 0) | |
y = int(''.join(y),2) | |
return y%11 | |
def _increment_ba(self, x): | |
self.ba[self._hash_fxn1(x)] = 1 | |
self.ba[self._hash_fxn2(x)] = 1 | |
def write(self, x): | |
for i in x: | |
self._increment_ba(i) | |
def check(self, x): | |
str_len = len(str(x)) | |
total = 0 | |
for i in x: | |
total += int(self.ba[self._hash_fxn2(i)]) + int(self.ba[self._hash_fxn1(i)]) | |
if total/str_len/2 == 1: | |
print("Item is in the bloom filter") | |
else: | |
print("Item is not in the bloom filter") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment