echasnovski · April 25, 2020 10:58
diff --git a/CycleSample.py b/CycleSample.py
 import random

 # Used only for exploration
 import numpy as np


 class CycleSample:
    def __init__(self, x, weights=None, preshuffle=True):
        self.pool = list(x)
        if preshuffle:
            random.shuffle(self.pool)

        self._n = len(x)
        self._inds = list(range(self._n))

        # For future, it can be useful for `weights` to be a function that is
        # applied to current pool before each draw. For example, this enables
        # usage "Don't sample recent `k` items and sample rest with probability
        # proportional to items' lengths". This will introduce significant
        # overhead, though.
        self.weights = weights

    def _pop(self, index):
        """Return item at index and place it at the beginning"""
        res = self.pool.pop(index)
        self.pool.insert(0, res)
        return res

    def draw(self, size=1):
        """Draw sample

        Draw sample based on the following algorithm:
        - Select position in the current pool, at which element will be drawn.
        - Pop this elment from the pool: extract it, put at the beginning of
          the pool, and return it.
        - Repeat.

        Parameters
        ----------
        size : int, optional
            Size of sample to draw, by default 1

        Returns
        -------
        draw : list
            List of drawn elements
        """
        inds = random.choices(self._inds, weights=self.weights, k=size)
        return [self._pop(i) for i in inds]

    def __str__(self):
        return (
            "Cycle sampling object with current pool "
            "(from most to least recently used):\n"
            f"{self.pool}"
        )


 def count_period_repeats(x, periods=[1]):
    x = np.asarray(x)
    return {st: np.sum(x[:-st] == x[st:]) for st in periods}


 x = list(range(10))
 n_sample = 10000
 n_recent = 3

 # `n_recent` most recently drawn elements won't be drawn
 not_n_recent_weights = [0] * n_recent + [1] * (len(x) - n_recent)
 not_n_recent = CycleSample(x, not_n_recent_weights)

 rand_draw_not_n_recent = np.array(not_n_recent.draw(n_sample))
 print(np.bincount(rand_draw_not_n_recent))
 print(count_period_repeats(rand_draw_not_n_recent, periods=range(1, 11)))

 # `n_recent` most recently drawn elements won't be drawn AND the recent -
 # following "human logic" (the least recently drawn element will have bigger
 # probability to be chosen)
 not_n_recent_human_weights = [0] * n_recent + list(range(1, len(x) - n_recent + 1))
 not_n_recent_human = CycleSample(x, not_n_recent_human_weights)

 rand_draw_not_n_recent_human = np.array(not_n_recent_human.draw(n_sample))
 print(np.bincount(rand_draw_not_n_recent_human))
 print(count_period_repeats(rand_draw_not_n_recent_human, periods=range(1, 11)))


 #%% First experiments
 from collections import Counter

 import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt


 class HumanRandom:
    def __init__(self, x):
        self.pool = list(np.random.permutation(x))

        self.weights = np.arange(1, len(x) + 1)
        self.weights = self.weights / np.sum(self.weights)

    def draw(self, size):
        inds = np.random.choice(np.arange(len(self.pool)), size=size, p=self.weights)

        return [self.pop_to_front(i) for i in inds]

    def pop_to_front(self, index):
        res = self.pool.pop(index)
        self.pool.insert(0, res)
        return res

    def __str__(self):
        return f"Human Random with pool: {repr(self.pool)}"


 # Tests for implementation correctness
 x = np.arange(4)

 humrand = HumanRandom(x)
 str(humrand)

 c = Counter()
 for _ in range(10000):
    pool_before = humrand.pool.copy()
    res = humrand.draw(1)
    c[pool_before.index(res)] += 1

 ## Should be approximately in proportion 1:2:3:4
 dict(sorted(c.items(), key=lambda x: x[1]))

 # Tests for draw properties
 x = np.arange(20)
 random_draw = np.random.choice(x, size=100000)
 humrandom_draw = np.array(HumanRandom(x).draw(100000))

 ## "Human random" has less "empty spots" which indicates for "more random",
 ## i.e. less streaks without certain number
 plt.plot(random_draw)
 plt.plot(humrandom_draw)

 print(f"Simple random bin count: {np.bincount(random_draw)}")
 print(f"Human random bin count: {np.bincount(humrandom_draw)}")

 ## Average distances between consecutive draws of one element
 def count_avg_dist(x, element):
    return np.mean(np.diff((x == element).nonzero()))


 print({el: count_avg_dist(random_draw, el) for el in x})
 print({el: count_avg_dist(humrandom_draw, el) for el in x})

 ## Number of consecutive repeats
 def count_steps_repeats(x, steps=[1]):
    return {st: np.sum(x[:-st] == x[st:]) for st in steps}


 ## Using "human random" significantly reduces number of repeats with step less
 ## than number of initial elements
 steps = np.arange(1, 21)
 random_step_repeats = count_steps_repeats(random_draw, steps)
 humrandom_step_repeats = count_steps_repeats(humrandom_draw, steps)
	import random

	# Used only for exploration
	import numpy as np


	class CycleSample:
	def __init__(self, x, weights=None, preshuffle=True):
	self.pool = list(x)
	if preshuffle:
	random.shuffle(self.pool)

	self._n = len(x)
	self._inds = list(range(self._n))

	# For future, it can be useful for `weights` to be a function that is
	# applied to current pool before each draw. For example, this enables
	# usage "Don't sample recent `k` items and sample rest with probability
	# proportional to items' lengths". This will introduce significant
	# overhead, though.
	self.weights = weights

	def _pop(self, index):
	"""Return item at index and place it at the beginning"""
	res = self.pool.pop(index)
	self.pool.insert(0, res)
	return res

	def draw(self, size=1):
	"""Draw sample

	Draw sample based on the following algorithm:
	- Select position in the current pool, at which element will be drawn.
	- Pop this elment from the pool: extract it, put at the beginning of
	the pool, and return it.
	- Repeat.

	Parameters
	----------
	size : int, optional
	Size of sample to draw, by default 1

	Returns
	-------
	draw : list
	List of drawn elements
	"""
	inds = random.choices(self._inds, weights=self.weights, k=size)
	return [self._pop(i) for i in inds]

	def __str__(self):
	return (
	"Cycle sampling object with current pool "
	"(from most to least recently used):\n"
	f"{self.pool}"
	)


	def count_period_repeats(x, periods=[1]):
	x = np.asarray(x)
	return {st: np.sum(x[:-st] == x[st:]) for st in periods}


	x = list(range(10))
	n_sample = 10000
	n_recent = 3

	# `n_recent` most recently drawn elements won't be drawn
	not_n_recent_weights = [0] * n_recent + [1] * (len(x) - n_recent)
	not_n_recent = CycleSample(x, not_n_recent_weights)

	rand_draw_not_n_recent = np.array(not_n_recent.draw(n_sample))
	print(np.bincount(rand_draw_not_n_recent))
	print(count_period_repeats(rand_draw_not_n_recent, periods=range(1, 11)))

	# `n_recent` most recently drawn elements won't be drawn AND the recent -
	# following "human logic" (the least recently drawn element will have bigger
	# probability to be chosen)
	not_n_recent_human_weights = [0] * n_recent + list(range(1, len(x) - n_recent + 1))
	not_n_recent_human = CycleSample(x, not_n_recent_human_weights)

	rand_draw_not_n_recent_human = np.array(not_n_recent_human.draw(n_sample))
	print(np.bincount(rand_draw_not_n_recent_human))
	print(count_period_repeats(rand_draw_not_n_recent_human, periods=range(1, 11)))


	#%% First experiments
	from collections import Counter

	import numpy as np
	import matplotlib
	import matplotlib.pyplot as plt


	class HumanRandom:
	def __init__(self, x):
	self.pool = list(np.random.permutation(x))

	self.weights = np.arange(1, len(x) + 1)
	self.weights = self.weights / np.sum(self.weights)

	def draw(self, size):
	inds = np.random.choice(np.arange(len(self.pool)), size=size, p=self.weights)

	return [self.pop_to_front(i) for i in inds]

	def pop_to_front(self, index):
	res = self.pool.pop(index)
	self.pool.insert(0, res)
	return res

	def __str__(self):
	return f"Human Random with pool: {repr(self.pool)}"


	# Tests for implementation correctness
	x = np.arange(4)

	humrand = HumanRandom(x)
	str(humrand)

	c = Counter()
	for _ in range(10000):
	pool_before = humrand.pool.copy()
	res = humrand.draw(1)
	c[pool_before.index(res)] += 1

	## Should be approximately in proportion 1:2:3:4
	dict(sorted(c.items(), key=lambda x: x[1]))

	# Tests for draw properties
	x = np.arange(20)
	random_draw = np.random.choice(x, size=100000)
	humrandom_draw = np.array(HumanRandom(x).draw(100000))

	## "Human random" has less "empty spots" which indicates for "more random",
	## i.e. less streaks without certain number
	plt.plot(random_draw)
	plt.plot(humrandom_draw)

	print(f"Simple random bin count: {np.bincount(random_draw)}")
	print(f"Human random bin count: {np.bincount(humrandom_draw)}")

	## Average distances between consecutive draws of one element
	def count_avg_dist(x, element):
	return np.mean(np.diff((x == element).nonzero()))


	print({el: count_avg_dist(random_draw, el) for el in x})
	print({el: count_avg_dist(humrandom_draw, el) for el in x})

	## Number of consecutive repeats
	def count_steps_repeats(x, steps=[1]):
	return {st: np.sum(x[:-st] == x[st:]) for st in steps}


	## Using "human random" significantly reduces number of repeats with step less
	## than number of initial elements
	steps = np.arange(1, 21)
	random_step_repeats = count_steps_repeats(random_draw, steps)
	humrandom_step_repeats = count_steps_repeats(humrandom_draw, steps)