Skip to content

Instantly share code, notes, and snippets.

@kdubovikov
Last active October 7, 2017 04:11
Show Gist options
  • Save kdubovikov/3fe69bb72e5db10e9311b91031342df9 to your computer and use it in GitHub Desktop.
Save kdubovikov/3fe69bb72e5db10e9311b91031342df9 to your computer and use it in GitHub Desktop.
Naive random sampling with numpy
import timeit
import numpy as np
from collections import Counter
def get_sample(arr, n_iter=None, sample_size=10,
fast=True):
"""Get random sample from arr.
Parameters
----------
arr: np.array
array to sample from.
n_iter: int
current iteration number.
sample_size: int
sample size
Returns
-------
sample: np.array
sample from arr of length n_iter.
"""
return np.random.choice(arr, sample_size, replace=False)
def collect_samples(arr,
sample_size,
n_samples):
"""
Collect several samples from arr.
Parameters
----------
arr: np.array
array to sample from.
sample_size: int
sample size.
n_samples: int
number of samples to take.
Returns
-------
samples: np.ndarray
sample matrix of shape (n_samples, sample_size)
"""
samples = np.zeros((n_samples + 1, sample_size), np.int32)
for sample_n in range(0, n_samples):
sample = get_sample(arr,
n_iter=sample_n,
sample_size=sample_size)
samples[sample_n] = sample
return samples
n = 3000000
arr = np.array([i for i in range(n)]).astype(np.int64)
timeit.timeit(stmt="collect_samples(arr, 1000, 10)", setup="from __main__ import collect_samples, arr", number=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment