Last active
October 7, 2017 04:11
-
-
Save kdubovikov/3fe69bb72e5db10e9311b91031342df9 to your computer and use it in GitHub Desktop.
Naive random sampling with numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import numpy as np | |
from collections import Counter | |
def get_sample(arr, n_iter=None, sample_size=10, | |
fast=True): | |
"""Get random sample from arr. | |
Parameters | |
---------- | |
arr: np.array | |
array to sample from. | |
n_iter: int | |
current iteration number. | |
sample_size: int | |
sample size | |
Returns | |
------- | |
sample: np.array | |
sample from arr of length n_iter. | |
""" | |
return np.random.choice(arr, sample_size, replace=False) | |
def collect_samples(arr, | |
sample_size, | |
n_samples): | |
""" | |
Collect several samples from arr. | |
Parameters | |
---------- | |
arr: np.array | |
array to sample from. | |
sample_size: int | |
sample size. | |
n_samples: int | |
number of samples to take. | |
Returns | |
------- | |
samples: np.ndarray | |
sample matrix of shape (n_samples, sample_size) | |
""" | |
samples = np.zeros((n_samples + 1, sample_size), np.int32) | |
for sample_n in range(0, n_samples): | |
sample = get_sample(arr, | |
n_iter=sample_n, | |
sample_size=sample_size) | |
samples[sample_n] = sample | |
return samples | |
n = 3000000 | |
arr = np.array([i for i in range(n)]).astype(np.int64) | |
timeit.timeit(stmt="collect_samples(arr, 1000, 10)", setup="from __main__ import collect_samples, arr", number=10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment