Skip to content

Instantly share code, notes, and snippets.

@mesejo
Created May 13, 2021 20:58
Show Gist options
  • Save mesejo/58595857304de26ff0f5c0f929a38d07 to your computer and use it in GitHub Desktop.
Save mesejo/58595857304de26ff0f5c0f929a38d07 to your computer and use it in GitHub Desktop.
Implementation of the L algorithm for reservoir sampling
import random
import numpy as np
from numpy.random import default_rng
from itertools import islice
def reservoir_sampling(pop, k):
reservoir = []
stream = iter(pop)
for e in islice(stream, k):
reservoir.append(e)
rng = default_rng()
w = np.exp(np.log(rng.random()) / k)
nxt = (k - 1) + rng.geometric(w)
for i, e in enumerate(stream, k):
if i == nxt:
reservoir[rng.integers(k)] = e
w *= np.exp(np.log(rng.random()) / k)
nxt += rng.geometric(w)
return reservoir
def reservoir_sampling_with_replacement(pop, k):
stream = iter(pop)
e = next(stream)
reservoir = np.repeat(e, k)
rng = default_rng()
w = rng.random(k)
nxt = rng.geometric(w)
min_nxt = np.amin(nxt)
for i, e in enumerate(stream, 1):
if i == min_nxt:
mask = (i == nxt)
reservoir[mask] = e
w[mask] = w[mask] * rng.random(np.count_nonzero(mask))
nxt[mask] = nxt[mask] + rng.geometric(w[mask])
min_nxt = np.amin(nxt)
return reservoir.tolist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment