Skip to content

Instantly share code, notes, and snippets.

@echasnovski
Created April 25, 2020 10:58
Show Gist options
  • Save echasnovski/2008041c73fe07944de6f841db80b71e to your computer and use it in GitHub Desktop.
Save echasnovski/2008041c73fe07944de6f841db80b71e to your computer and use it in GitHub Desktop.
Simple class for doing "cycled sampling": iterative sampling when currently sampled element is put in the beginning of sampling pool.
import random
# Used only for exploration
import numpy as np
class CycleSample:
def __init__(self, x, weights=None, preshuffle=True):
self.pool = list(x)
if preshuffle:
random.shuffle(self.pool)
self._n = len(x)
self._inds = list(range(self._n))
# For future, it can be useful for `weights` to be a function that is
# applied to current pool before each draw. For example, this enables
# usage "Don't sample recent `k` items and sample rest with probability
# proportional to items' lengths". This will introduce significant
# overhead, though.
self.weights = weights
def _pop(self, index):
"""Return item at index and place it at the beginning"""
res = self.pool.pop(index)
self.pool.insert(0, res)
return res
def draw(self, size=1):
"""Draw sample
Draw sample based on the following algorithm:
- Select position in the current pool, at which element will be drawn.
- Pop this elment from the pool: extract it, put at the beginning of
the pool, and return it.
- Repeat.
Parameters
----------
size : int, optional
Size of sample to draw, by default 1
Returns
-------
draw : list
List of drawn elements
"""
inds = random.choices(self._inds, weights=self.weights, k=size)
return [self._pop(i) for i in inds]
def __str__(self):
return (
"Cycle sampling object with current pool "
"(from most to least recently used):\n"
f"{self.pool}"
)
def count_period_repeats(x, periods=[1]):
x = np.asarray(x)
return {st: np.sum(x[:-st] == x[st:]) for st in periods}
x = list(range(10))
n_sample = 10000
n_recent = 3
# `n_recent` most recently drawn elements won't be drawn
not_n_recent_weights = [0] * n_recent + [1] * (len(x) - n_recent)
not_n_recent = CycleSample(x, not_n_recent_weights)
rand_draw_not_n_recent = np.array(not_n_recent.draw(n_sample))
print(np.bincount(rand_draw_not_n_recent))
print(count_period_repeats(rand_draw_not_n_recent, periods=range(1, 11)))
# `n_recent` most recently drawn elements won't be drawn AND the recent -
# following "human logic" (the least recently drawn element will have bigger
# probability to be chosen)
not_n_recent_human_weights = [0] * n_recent + list(range(1, len(x) - n_recent + 1))
not_n_recent_human = CycleSample(x, not_n_recent_human_weights)
rand_draw_not_n_recent_human = np.array(not_n_recent_human.draw(n_sample))
print(np.bincount(rand_draw_not_n_recent_human))
print(count_period_repeats(rand_draw_not_n_recent_human, periods=range(1, 11)))
#%% First experiments
from collections import Counter
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
class HumanRandom:
def __init__(self, x):
self.pool = list(np.random.permutation(x))
self.weights = np.arange(1, len(x) + 1)
self.weights = self.weights / np.sum(self.weights)
def draw(self, size):
inds = np.random.choice(np.arange(len(self.pool)), size=size, p=self.weights)
return [self.pop_to_front(i) for i in inds]
def pop_to_front(self, index):
res = self.pool.pop(index)
self.pool.insert(0, res)
return res
def __str__(self):
return f"Human Random with pool: {repr(self.pool)}"
# Tests for implementation correctness
x = np.arange(4)
humrand = HumanRandom(x)
str(humrand)
c = Counter()
for _ in range(10000):
pool_before = humrand.pool.copy()
res = humrand.draw(1)
c[pool_before.index(res)] += 1
## Should be approximately in proportion 1:2:3:4
dict(sorted(c.items(), key=lambda x: x[1]))
# Tests for draw properties
x = np.arange(20)
random_draw = np.random.choice(x, size=100000)
humrandom_draw = np.array(HumanRandom(x).draw(100000))
## "Human random" has less "empty spots" which indicates for "more random",
## i.e. less streaks without certain number
plt.plot(random_draw)
plt.plot(humrandom_draw)
print(f"Simple random bin count: {np.bincount(random_draw)}")
print(f"Human random bin count: {np.bincount(humrandom_draw)}")
## Average distances between consecutive draws of one element
def count_avg_dist(x, element):
return np.mean(np.diff((x == element).nonzero()))
print({el: count_avg_dist(random_draw, el) for el in x})
print({el: count_avg_dist(humrandom_draw, el) for el in x})
## Number of consecutive repeats
def count_steps_repeats(x, steps=[1]):
return {st: np.sum(x[:-st] == x[st:]) for st in steps}
## Using "human random" significantly reduces number of repeats with step less
## than number of initial elements
steps = np.arange(1, 21)
random_step_repeats = count_steps_repeats(random_draw, steps)
humrandom_step_repeats = count_steps_repeats(humrandom_draw, steps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment