Skip to content

Instantly share code, notes, and snippets.

@dbalabka
Created May 1, 2021 11:19
Show Gist options
  • Save dbalabka/439a76cd182338f64ad4269d77a4f693 to your computer and use it in GitHub Desktop.
Save dbalabka/439a76cd182338f64ad4269d77a4f693 to your computer and use it in GitHub Desktop.
Bootstrapping hypothesis testing of distribution equality using Efron's alghoritm
from typing import Tuple
import numpy as np
import time
import numba
from scipy.stats import mannwhitneyu
@numba.njit(parallel=True, fastmath=True, nogil=True)
def compare_dist(z: np.ndarray, y: np.ndarray, n_samples: int = 10_000) -> Tuple[np.ndarray, float, float]:
n = z.shape[0]
m = y.shape[0]
x = np.concatenate((z, y))
t_obs = z.mean() - y.mean()
t = np.zeros(n_samples)
for i in numba.prange(n_samples):
x_ = np.random.choice(x, n + m)
t[i] = x_[:n].mean() - x_[m:].mean()
return t, t_obs, float(np.sum(np.greater_equal(t, t_obs)) / n_samples)
np.random.seed(42)
size = 100_000
z = np.random.normal(0.00005, 0.5, size)
y = np.random.normal(0, 0.5, size)
n_samples = 10_000
start = time.time()
t = compare_dist(z, y, n_samples)
print(f'p-value: {t[2]}')
end = time.time()
print(end - start)
start = time.time()
t = mannwhitneyu(z, y)
print(f'p-value: {t[1]}')
end = time.time()
print(end - start)
# Numba debug
# bootstrap.compare_dist.parallel_diagnostics(level=4)
# bootstrap.compare_dist.inspect_types()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment