Created
September 28, 2017 16:06
-
-
Save dannycroft/49f16675891277c98a4f26c0c55bd387 to your computer and use it in GitHub Desktop.
A basic bloom filter implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| ''' A basic bloom filter implementation''' | |
| from bitarray import bitarray | |
| from hashlib import sha256 | |
| import math | |
| def h_i(i, m, s): | |
| ''' | |
| Our "uniformly distributed" hash function: h_i(s) = sha256(s + i) % m | |
| i - index of hash function | |
| m - length of bit vector | |
| s - an element of set S (string) | |
| ''' | |
| return int(sha256(bytes([i]) + s).hexdigest(), 16) % m | |
| def optimal_km(n, p): | |
| ''' | |
| Given set size n, and false positive probability p what are the optimal | |
| number of hash functions k and length of bit vector m ? | |
| ''' | |
| ln2 = math.log(2) | |
| lnp = math.log(p) | |
| k = -lnp/ln2 | |
| m = -n*lnp/((ln2)**2) | |
| return int(math.ceil(k)), int(math.ceil(m)) | |
| def exact_pfp(n, m, k): | |
| ''' | |
| Calculate the exact false positive probability. | |
| n - number of items in the set | |
| m - length of bit bector | |
| k - number of hash functions | |
| ''' | |
| return ( 1 - (1 - (1/m))**(k*n) )**k | |
| class bloom_filter: | |
| def __init__(self, m, k, h=h_i): | |
| self.m = m # length of bit array | |
| self.k = k # number of hash functions | |
| self.h = h # hash function | |
| self.bits = bitarray(self.m) # the actual bit store | |
| def add(self, s): | |
| for i in range(self.k): | |
| self.bits[self.h(i, self.m, s)] = 1 | |
| def contains(self, s): | |
| for i in range(self.k): | |
| if self.bits[self.h(i, self.m, s)] == 0: | |
| return False | |
| return True # all bits were set |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment