Created
February 5, 2023 22:51
-
-
Save outofmbufs/d259ba6bf1fdfe5c082e52abf48a9cdd to your computer and use it in GitHub Desktop.
small subset of NIST random number tests; see NIST 800-22; NOTE: requires scipy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
import functools | |
import itertools | |
from collections import Counter | |
try: | |
from scipy.special import gammaincc | |
except ModuleNotFoundError: | |
def gammaincc(x1, x2): | |
raise TypeError("Requires gammaincc; scipy was not found") | |
# NIST Special Publication 800-22: | |
# | |
# https://csrc.nist.gov/publications/detail/sp/800-22/rev-1a/final | |
# | |
# defines statistical tests for randomness. This is an implementation of | |
# selected tests, done simply as an exercise. For any "real" purpose, | |
# there are better/faster/more-complete implementations; use them. | |
# Two known ones in python are: | |
# https://github.com/InsaneMonster/NistRng # pip install nistrng | |
# https://github.com/stevenang/randomness_testsuite | |
# | |
def streambits(stream, /, *, width, n=None): | |
"""Generate a sequence of 1's and 0's extracted from a stream of ints. | |
stream: iterable generating integer values. | |
width: the (implied) width in bits of each element of the stream. | |
n: if specified, a limit on how many stream elements to consume. | |
NOTE: bits generated LSb to MSb from each element in stream. | |
""" | |
masks = [1 << i for i in range(width)] | |
for i in itertools.islice(stream, n): | |
yield from (1 if (i & m) else 0 for m in masks) | |
# this decorator just handles automatically calculating n if it | |
# was not given. NOTE: of course, not giving n only works if the | |
# iterable being used is a sequence (i.e., has a len()) | |
def auto_n(f): | |
@functools.wraps(f) | |
def _wrapped(it, *args, n=None, **kwargs): | |
if n is None: | |
n = len(it) | |
return f(it, *args, n=n, **kwargs) | |
return _wrapped | |
@auto_n | |
def frequency(it, /, *, n): | |
"""Perform Monobit test from NIST 800-22 on a sequence of 0's and 1's. | |
Section 2.1 in the NIST paper. | |
Returns a p value as defined in NIST 800-22, with p < 0.01 considered | |
as indicating the bits have failed the test. | |
n: length of bits; if defaulted len(bits) will be used. | |
""" | |
sn = sum((-1, 1)[i] for i in itertools.islice(it, n)) | |
s_obs = abs(sn)/math.sqrt(n) | |
return math.erfc(s_obs/math.sqrt(2)) | |
@auto_n | |
def blocks(it, /, *, n): | |
"""Batch data into tuples of length n. Throws away last if short.""" | |
while len(batch := tuple(itertools.islice(it, n))) == n: | |
yield batch | |
@auto_n | |
def blockfrequency(it, M, /, *, n): | |
"""Perform test from NIST 800-22 / 2.2 on an iterable of 0's and 1's. | |
Returns a p value, with p < 0.01 considered indicating non-random data. | |
M: variable name chosen to match 800-22; size of subblocks to test. | |
n: total number of bits to use from it. | |
""" | |
# Using bx[i] for what NIST doc calls pi[i] | |
bx = [sum(t) / M for t in blocks(itertools.islice(it, n), n=M)] | |
chi2obs = 4 * M * sum((bi - 0.5) * (bi - 0.5) for bi in bx) | |
return gammaincc(len(bx) / 2, chi2obs / 2) | |
@auto_n | |
def runs(it, /, *, n): | |
"""Perform test 2.3 from NIST 800-22.""" | |
# this test requires going through the values more than once so... | |
e = list(itertools.islice(it, n)) | |
px = sum(e) / n # 800-22 calls this Pi | |
# per 800-22, must pass this precondition: | |
if abs(px - 0.5) >= (2 / math.sqrt(n)): | |
return 0.0 | |
vn_obs = sum(0 if e[k] == e[k+1] else 1 for k in range(n-1)) + 1 | |
pxx = px * (1 - px) | |
return math.erfc(abs(vn_obs - (2 * n * pxx)) / | |
(2 * math.sqrt(2 * n) * pxx)) | |
@auto_n | |
def longestrunofones(it, /, *, n): | |
"""Perform test 2.4 from NIST 800-22.""" | |
# As it says in section 3.4 of the NIST 800-22 paper: | |
# """Cases K=3, M=8; K=5, M=128; and K=6, M=10000 | |
# are currently embedded in the test suite""" | |
# This code reflects those choices and the values precomputed | |
# for px[i] ("pi-sub-i" in the paper) were copy/pasted. | |
if n >= 750000: | |
M = 10000 | |
vmin = 10 | |
vmax = 16 | |
capN = 75 | |
px = [0.0882, 0.2092, 0.2483, 0.1933, 0.1208, 0.0675, 0.0727] | |
elif n >= 6272: | |
M = 128 | |
vmin = 4 | |
vmax = 9 | |
capN = 49 | |
px = [0.1174, 0.2430, 0.2493, 0.1752, 0.1027, 0.1124] | |
else: | |
M = 8 | |
vmin = 1 | |
vmax = 4 | |
capN = 16 | |
px = [0.2148, 0.3672, 0.2305, 0.1875] | |
# algorithm from 2.4.4 in 800-22: | |
# (1) Divide the sequence into M-bit blocks ('blocks()') | |
# (1b) but note that only take capN of them regardless of n. | |
# (2) tabulate frequences of longest runs of runs (see doc for details) | |
c = Counter() | |
for t in blocks(itertools.islice(it, capN*M), n=M): | |
# find the longest run of 1's in this tuple | |
longest = 0 | |
runlen = 0 | |
for v in itertools.chain(t, (0,)): # extra 0 closes any last run | |
if v == 1: | |
runlen += 1 | |
else: | |
longest = max(runlen, longest) | |
runlen = 0 | |
c[min(max(vmin, longest), vmax)] += 1 | |
# (3) compute chi-squared-observed | |
chi2obs = sum(((c[vmin+i] - (capN*pxi))**2) / (capN * pxi) | |
for i, pxi in enumerate(px)) | |
return gammaincc((len(px) - 1) / 2, chi2obs / 2) | |
def badgen(ef, /, *, width=32, badval=0): | |
"""A 'bad' random number generator. | |
Returns a fixed value (badval) one out of every ef calls. | |
Otherwise returns a random number of bit-width 'width' | |
""" | |
rrn = 1 << width | |
while True: | |
for _ in range(ef-1): | |
yield random.randrange(rrn) | |
yield badval | |
if __name__ == "__main__": | |
import unittest | |
class TestMethods(unittest.TestCase): | |
# test data from NIST 800-22 | |
# DO NOT CHANGE; the expected_p values all depend on this data | |
NIST100 = [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, | |
1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, | |
0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, | |
0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, | |
0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0] | |
NIST128 = [1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, | |
0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, | |
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, | |
0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, | |
0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, | |
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, | |
1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, | |
1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0] | |
@staticmethod | |
def equalNIST(x1, x2): | |
# compare within the 6-digits given in the NIST 800-22 paper | |
return round(x1, 6) == round(x2, 6) | |
def testfrequency(self): | |
expected_p = 0.109599 # from 800-22 | |
p = frequency(self.NIST100) | |
with self.subTest(p=p, expected_p=expected_p): | |
self.assertTrue(self.equalNIST(expected_p, p)) | |
def testblockfreq(self): | |
expected_p = 0.706438 # from 800-22 | |
p = blockfrequency(self.NIST100, 10) | |
with self.subTest(p=p, expected_p=expected_p): | |
self.assertTrue(self.equalNIST(expected_p, p)) | |
def testruns(self): | |
expected_p = 0.500798 # from 800-22 | |
p = runs(self.NIST100) | |
with self.subTest(p=p, expected_p=expected_p): | |
self.assertTrue(self.equalNIST(expected_p, p)) | |
def testlongestrun1(self): | |
expected_p = 0.180598 # from 800-22 | |
p = longestrunofones(self.NIST128) # note 128 bits not 100 | |
with self.subTest(p=p, expected_p=expected_p): | |
self.assertTrue(self.equalNIST(expected_p, p)) | |
def testbad(self): | |
# Make some bad sequences from the NIST sequences, by forcing | |
# every other entry to be zero. Some of those might already be | |
# zero of course. NOTE - there is no guarantee that this | |
# sufficiently "corrupts" a good sequence to be bad for all | |
# possible tests. But empirically it works. | |
BAD128 = [a * b | |
for a, b in zip(itertools.cycle((1, 0)), self.NIST128)] | |
BADseq = BAD128[:100] | |
badplim = 0.01 | |
self.assertTrue(frequency(BADseq) < badplim) | |
self.assertTrue(blockfrequency(BADseq, 10) < badplim) | |
self.assertTrue(runs(BADseq) < badplim) | |
self.assertTrue(longestrunofones(BAD128) < badplim) | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment