Skip to content

Instantly share code, notes, and snippets.

@dtlanghoff
Last active August 29, 2015 13:56
Show Gist options
  • Save dtlanghoff/9215464 to your computer and use it in GitHub Desktop.
Save dtlanghoff/9215464 to your computer and use it in GitHub Desktop.
Empirical randomness testing
import numpy as np
import scipy.stats
from scipy.misc import factorial
from sklearn.utils.extmath import cartesian
def equidistribution_test(Y, d):
observed = np.bincount(seq, minlength=d)
expected = len(seq) * np.ones(d) / d
return 1 - scipy.stats.chi2.cdf(np.sum((observed-expected)**2 / expected), d - 1)
def serial_test(seq, r):
if seq.shape[0] % 2 != 0:
seq = seq[:-1]
observed = np.bincount(r*seq[0::2] + seq[1::2], minlength=r**2)
expected = (len(seq)/2) * np.ones(r ** 2) / (r ** 2)
return 1 - scipy.stats.chi2.cdf(np.sum((observed-expected)**2 / expected), r**2 - 1)
def binom(n, k):
i = np.arange(k, dtype=np.float) + 1
return np.product((n-(k-i))/i)
def stirling(n, k):
return np.sum((-1)**(k-j)*binom(k, j)*j**n for j in xrange(k + 1)) / factorial(k)
def poker_test_map(r):
return np.apply_along_axis(lambda x: np.unique(x).shape[0], 1, cartesian([np.arange(r)]*5)).astype(np.uint8)
poker_test_memo = {}
def poker_test(seq, d):
global poker_test_memo
m = poker_test_memo[d] if d in poker_test_memo else poker_test_memo.setdefault(d, poker_test_map(d))
seq = seq.reshape(len(seq) / 5, 5)
seq = np.sum(seq[...,i] * d**(4-i) for i in range(5))
observed = np.bincount(m[seq], minlength=6)[1:]
expected = len(seq) * np.fromiter((factorial(d) / factorial(d-r) / d**5 * stirling(5, r) for r in np.arange(5)+1), dtype=np.float)
return 1 - scipy.stats.chi2.cdf(np.sum((observed-expected)**2 / expected), 4)
# Knuth, Donald Ervin. 1997. The art of computer programming. Vol. 2 : Seminumerical algorithms, pp. 61--62. 3rd ed. Reading, Mass.: Addison-Wesley.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment