Skip to content

Instantly share code, notes, and snippets.

@outofmbufs
Created February 5, 2023 22:51
Show Gist options
  • Save outofmbufs/d259ba6bf1fdfe5c082e52abf48a9cdd to your computer and use it in GitHub Desktop.
Save outofmbufs/d259ba6bf1fdfe5c082e52abf48a9cdd to your computer and use it in GitHub Desktop.
small subset of NIST random number tests; see NIST 800-22; NOTE: requires scipy
import math
import random
import functools
import itertools
from collections import Counter
try:
from scipy.special import gammaincc
except ModuleNotFoundError:
def gammaincc(x1, x2):
raise TypeError("Requires gammaincc; scipy was not found")
# NIST Special Publication 800-22:
#
# https://csrc.nist.gov/publications/detail/sp/800-22/rev-1a/final
#
# defines statistical tests for randomness. This is an implementation of
# selected tests, done simply as an exercise. For any "real" purpose,
# there are better/faster/more-complete implementations; use them.
# Two known ones in python are:
# https://github.com/InsaneMonster/NistRng # pip install nistrng
# https://github.com/stevenang/randomness_testsuite
#
def streambits(stream, /, *, width, n=None):
"""Generate a sequence of 1's and 0's extracted from a stream of ints.
stream: iterable generating integer values.
width: the (implied) width in bits of each element of the stream.
n: if specified, a limit on how many stream elements to consume.
NOTE: bits generated LSb to MSb from each element in stream.
"""
masks = [1 << i for i in range(width)]
for i in itertools.islice(stream, n):
yield from (1 if (i & m) else 0 for m in masks)
# this decorator just handles automatically calculating n if it
# was not given. NOTE: of course, not giving n only works if the
# iterable being used is a sequence (i.e., has a len())
def auto_n(f):
@functools.wraps(f)
def _wrapped(it, *args, n=None, **kwargs):
if n is None:
n = len(it)
return f(it, *args, n=n, **kwargs)
return _wrapped
@auto_n
def frequency(it, /, *, n):
"""Perform Monobit test from NIST 800-22 on a sequence of 0's and 1's.
Section 2.1 in the NIST paper.
Returns a p value as defined in NIST 800-22, with p < 0.01 considered
as indicating the bits have failed the test.
n: length of bits; if defaulted len(bits) will be used.
"""
sn = sum((-1, 1)[i] for i in itertools.islice(it, n))
s_obs = abs(sn)/math.sqrt(n)
return math.erfc(s_obs/math.sqrt(2))
@auto_n
def blocks(it, /, *, n):
"""Batch data into tuples of length n. Throws away last if short."""
while len(batch := tuple(itertools.islice(it, n))) == n:
yield batch
@auto_n
def blockfrequency(it, M, /, *, n):
"""Perform test from NIST 800-22 / 2.2 on an iterable of 0's and 1's.
Returns a p value, with p < 0.01 considered indicating non-random data.
M: variable name chosen to match 800-22; size of subblocks to test.
n: total number of bits to use from it.
"""
# Using bx[i] for what NIST doc calls pi[i]
bx = [sum(t) / M for t in blocks(itertools.islice(it, n), n=M)]
chi2obs = 4 * M * sum((bi - 0.5) * (bi - 0.5) for bi in bx)
return gammaincc(len(bx) / 2, chi2obs / 2)
@auto_n
def runs(it, /, *, n):
"""Perform test 2.3 from NIST 800-22."""
# this test requires going through the values more than once so...
e = list(itertools.islice(it, n))
px = sum(e) / n # 800-22 calls this Pi
# per 800-22, must pass this precondition:
if abs(px - 0.5) >= (2 / math.sqrt(n)):
return 0.0
vn_obs = sum(0 if e[k] == e[k+1] else 1 for k in range(n-1)) + 1
pxx = px * (1 - px)
return math.erfc(abs(vn_obs - (2 * n * pxx)) /
(2 * math.sqrt(2 * n) * pxx))
@auto_n
def longestrunofones(it, /, *, n):
"""Perform test 2.4 from NIST 800-22."""
# As it says in section 3.4 of the NIST 800-22 paper:
# """Cases K=3, M=8; K=5, M=128; and K=6, M=10000
# are currently embedded in the test suite"""
# This code reflects those choices and the values precomputed
# for px[i] ("pi-sub-i" in the paper) were copy/pasted.
if n >= 750000:
M = 10000
vmin = 10
vmax = 16
capN = 75
px = [0.0882, 0.2092, 0.2483, 0.1933, 0.1208, 0.0675, 0.0727]
elif n >= 6272:
M = 128
vmin = 4
vmax = 9
capN = 49
px = [0.1174, 0.2430, 0.2493, 0.1752, 0.1027, 0.1124]
else:
M = 8
vmin = 1
vmax = 4
capN = 16
px = [0.2148, 0.3672, 0.2305, 0.1875]
# algorithm from 2.4.4 in 800-22:
# (1) Divide the sequence into M-bit blocks ('blocks()')
# (1b) but note that only take capN of them regardless of n.
# (2) tabulate frequences of longest runs of runs (see doc for details)
c = Counter()
for t in blocks(itertools.islice(it, capN*M), n=M):
# find the longest run of 1's in this tuple
longest = 0
runlen = 0
for v in itertools.chain(t, (0,)): # extra 0 closes any last run
if v == 1:
runlen += 1
else:
longest = max(runlen, longest)
runlen = 0
c[min(max(vmin, longest), vmax)] += 1
# (3) compute chi-squared-observed
chi2obs = sum(((c[vmin+i] - (capN*pxi))**2) / (capN * pxi)
for i, pxi in enumerate(px))
return gammaincc((len(px) - 1) / 2, chi2obs / 2)
def badgen(ef, /, *, width=32, badval=0):
"""A 'bad' random number generator.
Returns a fixed value (badval) one out of every ef calls.
Otherwise returns a random number of bit-width 'width'
"""
rrn = 1 << width
while True:
for _ in range(ef-1):
yield random.randrange(rrn)
yield badval
if __name__ == "__main__":
import unittest
class TestMethods(unittest.TestCase):
# test data from NIST 800-22
# DO NOT CHANGE; the expected_p values all depend on this data
NIST100 = [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0]
NIST128 = [1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,
1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0]
@staticmethod
def equalNIST(x1, x2):
# compare within the 6-digits given in the NIST 800-22 paper
return round(x1, 6) == round(x2, 6)
def testfrequency(self):
expected_p = 0.109599 # from 800-22
p = frequency(self.NIST100)
with self.subTest(p=p, expected_p=expected_p):
self.assertTrue(self.equalNIST(expected_p, p))
def testblockfreq(self):
expected_p = 0.706438 # from 800-22
p = blockfrequency(self.NIST100, 10)
with self.subTest(p=p, expected_p=expected_p):
self.assertTrue(self.equalNIST(expected_p, p))
def testruns(self):
expected_p = 0.500798 # from 800-22
p = runs(self.NIST100)
with self.subTest(p=p, expected_p=expected_p):
self.assertTrue(self.equalNIST(expected_p, p))
def testlongestrun1(self):
expected_p = 0.180598 # from 800-22
p = longestrunofones(self.NIST128) # note 128 bits not 100
with self.subTest(p=p, expected_p=expected_p):
self.assertTrue(self.equalNIST(expected_p, p))
def testbad(self):
# Make some bad sequences from the NIST sequences, by forcing
# every other entry to be zero. Some of those might already be
# zero of course. NOTE - there is no guarantee that this
# sufficiently "corrupts" a good sequence to be bad for all
# possible tests. But empirically it works.
BAD128 = [a * b
for a, b in zip(itertools.cycle((1, 0)), self.NIST128)]
BADseq = BAD128[:100]
badplim = 0.01
self.assertTrue(frequency(BADseq) < badplim)
self.assertTrue(blockfrequency(BADseq, 10) < badplim)
self.assertTrue(runs(BADseq) < badplim)
self.assertTrue(longestrunofones(BAD128) < badplim)
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment