Last active
October 8, 2017 12:58
-
-
Save kdubovikov/7d7598ae918b92a784b6e0470f6702e9 to your computer and use it in GitHub Desktop.
Compare sampling algorithms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def collect_collisions(arr, sample_size, n_samples, | |
fast=False): | |
"""Collect total number of collisions made for each sample of arr. | |
Parameters | |
---------- | |
arr: np.array | |
array to sample from. | |
sample_size: int | |
sample size. | |
n_samples: int | |
number of samples to take. | |
fast: bool | |
use sampling optimized for fast consecutive samples | |
from the same array. | |
Returns | |
------- | |
collisions: np.array | |
collision number for each sample taken | |
""" | |
samples = collect_samples(arr, | |
sample_size, | |
n_samples, | |
fast=fast).flatten() | |
# count collisions for *all* numbers we have sampled | |
counts = Counter(samples) | |
collision_sum = sum([count for k, count in counts.items() if count > 1]) | |
return collision_sum | |
n = 10000 | |
arr = np.array([i for i in range(n)]) | |
# copy arrays so all experiments will be isolated in terms of array shuffling | |
arr_fast = arr.copy() | |
choice_num = [] | |
fast_num = [] | |
for i in tqdm(range(0, 500), desc="Collecting sample statistics"): | |
collisions = collect_collisions(arr, 1000, 10, fast=False) | |
collisions_fast = collect_collisions(arr_fast, 1000, 10, fast=True) | |
choice_num.append(collisions) | |
fast_num.append(collisions_fast) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment