Skip to content

Instantly share code, notes, and snippets.

@jdmaturen
Created April 2, 2011 20:40
Show Gist options
  • Save jdmaturen/899861 to your computer and use it in GitHub Desktop.
Save jdmaturen/899861 to your computer and use it in GitHub Desktop.
"""http://en.wikipedia.org/wiki/Reservoir_sampling
"""
import random
import struct
class RedisReservoirSample(object):
def __init__(self, redis, n, fmt='>i'):
"""
n is the sample size
fmt is struct format of the stored values, default 32-bit signed int
"""
self.redis = redis
self.n = n
self.fmt = fmt
self.fmt_size = struct.calcsize(fmt)
def append(self, name, val):
"""Append a val to the sample space
"""
count = self.redis.incr(name + '_count')
offset = None
if count <= self.n:
offset = (count-1) * self.fmt_size
else:
r = random.randrange(count)
if r < self.n:
offset = r * self.fmt_size
if offset is not None:
self.redis.setrange(name, offset, struct.pack(self.fmt, val))
def read(self, name):
"""returns a list of unpacked values
"""
val, count = self.redis.pipeline().get(name).get(name + '_count').execute()
count = int(count)
if count > self.n:
count = self.n
def chunk(seq, size, count):
for i in xrange(count):
yield seq[i*size:(i+1)*size]
def unpack(fmt, val):
x = struct.unpack(self.fmt, val)
if len(x) == 1:
return x[0]
return x
return [unpack(self.fmt, x) for x in chunk(val, self.fmt_size, count)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment