Created
April 7, 2013 15:49
-
-
Save jdp/5331012 to your computer and use it in GitHub Desktop.
example count-min sketch implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class CountMinSketch(object): | |
def __init__(self, w, d, p): | |
self.w = w | |
self.d = d | |
self.p = p | |
self.C = [[0] * self.w for _ in range(self.d)] | |
self.a = [random.randint(1, self.p) for _ in range(self.d)] | |
self.b = [random.randint(1, self.p) for _ in range(self.d)] | |
self.N = 0 | |
def hash(self, j, i): | |
return (self.a[j] * i + self.b[j] % self.p) % self.w | |
def update(self, i, c): | |
self.N += c | |
for j in range(self.d): | |
self.C[j][self.hash(j, i)] += c | |
def get(self, i): | |
e = self.p + 1 | |
for j in range(self.d): | |
e = min(e, self.C[j][self.hash(j, i)]) | |
return e | |
if __name__ == '__main__': | |
from collections import defaultdict | |
tests = [13, 69, 420] | |
sketch = CountMinSketch(2000, 10, 2 ** 31 - 1) | |
actual = defaultdict(int) | |
for i in range(100000): | |
event = random.randint(1, 1000) | |
if event in tests: | |
actual[event] += 1 | |
sketch.update(event, 1) | |
for test in tests: | |
print test, "real", actual[test], "estimated", sketch.get(test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment