Skip to content

Instantly share code, notes, and snippets.

@alexgarel
Last active April 11, 2022 14:45
Show Gist options
  • Save alexgarel/9de2f548414ddeea1ed5279800d98ddd to your computer and use it in GitHub Desktop.
Save alexgarel/9de2f548414ddeea1ed5279800d98ddd to your computer and use it in GitHub Desktop.
Validating streamed random choice
import collections
import statistics
def run_exp(k, N):
"""run one experimentation, that is stream selecting k items among N"""
result = []
seen = 0
for i in range(N):
seen += 1
index = random.randrange(seen)
if index < N:
result.insert(index, i)
result = result[:k]
return result
def run_exps(k, N):
"""run N*100 experience of selecting k itmes among N"""
num = N * 100
c = collections.Counter()
for i in range(num):
c.update(run_exp(k, N))
print("Expected value:", k * 100, "Mean:", statistics.mean(c.values()), "Std dev:", statistics.stdev(c.values()))
return c
data = run_exps(10,200)
# Expected value: 1000 Mean: 1000 Std dev: 29.911595035837518
# Expected value: 1000 Mean: 1000 Std dev: 30.925912440888588
# Expected value: 1000 Mean: 1000 Std dev: 28.366765181003117
# data = run_exps(20, 2000) # takes around 5 min on my cpu !
# Expected value: 2000 Mean: 2000 Std dev: 43.85401449461209
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment