Skip to content

Instantly share code, notes, and snippets.

@jdp
Created April 7, 2013 15:49
Show Gist options
  • Save jdp/5331012 to your computer and use it in GitHub Desktop.
Save jdp/5331012 to your computer and use it in GitHub Desktop.
example count-min sketch implementation
import random
class CountMinSketch(object):
def __init__(self, w, d, p):
self.w = w
self.d = d
self.p = p
self.C = [[0] * self.w for _ in range(self.d)]
self.a = [random.randint(1, self.p) for _ in range(self.d)]
self.b = [random.randint(1, self.p) for _ in range(self.d)]
self.N = 0
def hash(self, j, i):
return (self.a[j] * i + self.b[j] % self.p) % self.w
def update(self, i, c):
self.N += c
for j in range(self.d):
self.C[j][self.hash(j, i)] += c
def get(self, i):
e = self.p + 1
for j in range(self.d):
e = min(e, self.C[j][self.hash(j, i)])
return e
if __name__ == '__main__':
from collections import defaultdict
tests = [13, 69, 420]
sketch = CountMinSketch(2000, 10, 2 ** 31 - 1)
actual = defaultdict(int)
for i in range(100000):
event = random.randint(1, 1000)
if event in tests:
actual[event] += 1
sketch.update(event, 1)
for test in tests:
print test, "real", actual[test], "estimated", sketch.get(test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment