Last active
May 5, 2018 20:08
-
-
Save jfbu/b14e09d7e8bcc16c394fd257799b0a0c to your computer and use it in GitHub Desktop.
Python code demonstrating biases in the parity bits of the PDFTeX RNG
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env/py | |
"""This is a quick Pythonification of the Pascal code in pdftex.web | |
I know I should use a Python generator rather than variables with | |
global scope... | |
Anyway, brief testing showed it does match exactly with actual | |
\pdfuniformedeviate/\pdfsetrandomseed, and that's what I wanted | |
in case I attempt some statistics on this PRNG. | |
May 2, 2018. JF B. | |
Added test_walk May 5, 2018. | |
Conclusion: in successive runs of 165 \pdfuniformdeviate | |
(starting with the 55th one after \pdfsetrandomseed) | |
it is more frequent that there are more odd numbers than even numbers. | |
""" | |
fraction_one = 2**28 | |
fraction_half = 2**27 | |
randoms = [0] * 55 | |
j_random = 0 | |
def next_random(): | |
global j_random | |
if j_random==0: | |
new_randoms() | |
else: | |
j_random -=1 | |
def new_randoms(): | |
global j_random | |
global randoms | |
for k in range(24): | |
randoms[k] = (randoms[k] - randoms[k+31]) % fraction_one | |
for k in range(24, 55): | |
randoms[k] = (randoms[k] - randoms[k-24]) % fraction_one | |
j_random = 54 | |
def init_randoms(seed): | |
global randoms | |
j = abs(seed) | |
while j >= fraction_one: | |
j //= 2 | |
k = 1 | |
for i in range(55): | |
jj = k | |
k = (j - k) % fraction_one | |
j = jj | |
randoms[(i * 21) % 55] = j | |
new_randoms() | |
new_randoms() | |
new_randoms() | |
def unif_rand(x): | |
global j_random | |
global randoms | |
next_random() | |
# here we must round, but Python // truncates, so we are in exact | |
# opposite as when we want to truncate in a \numexpr which rounds ;-) | |
y = ( abs(x) * randoms[j_random] + fraction_half ) // fraction_one | |
if y == abs(x): | |
return 0 | |
else: | |
if x > 0: | |
return y | |
else: | |
return -y | |
# all the above is quite direct Pythonification of code from pdftex.web | |
# it definitely should be using a generator ("yield") to be more Pythonesque... | |
# now let's start using it | |
# def test(seed, N, reps): | |
# """Create a stream of random integers which we can compare with similar | |
# one from tex code using \pdfuniformdeviate N | |
# I did it with 1000 draws and seeds 12345 and 12346 and match was perfect. | |
# """ | |
# init_randoms(seed) | |
# with open("randomints_py_" + str(seed) + ".txt", "w") as f: | |
# for k in range(reps): | |
# f.write(str(unif_rand(N)) + '\n') | |
from scipy.stats import chisquare | |
def test1(seed, reps): | |
"""Just for checking things do work as expected. | |
Helped me understand the shift by 1 at start.""" | |
global j_random | |
max_rand = 2**28 | |
print("seed = %s" % seed) | |
L = [0] * 110 | |
init_randoms(seed) | |
print(j_random) | |
for k in range(54): | |
x = unif_rand(max_rand) | |
print(j_random) | |
for i in range(55): | |
L[54-i] = unif_rand(max_rand) | |
for i in range(55): | |
L[55 + 54 - i] = unif_rand(max_rand) | |
occ = {0: 0, 1: 0} | |
for k in range(reps): | |
L[:55] = L[55:] | |
for i in range(109, 54, -1): | |
L[i] = unif_rand(max_rand) | |
for i in range(55, 110): | |
y = L[i] - L[i - 55] + L[i - 24] | |
x = y % 2 | |
# if y != 0: | |
# print(reps, i, y) | |
occ[x] +=1 | |
print(occ) | |
def test2(seed, R): | |
"""Checking I go recurrences right. | |
""" | |
print(seed) | |
max_rand = 2**28 | |
init_randoms(seed) | |
L = [0] # when we start j_random is 54 which means morally | |
# that an array item is already consumed, first random will be | |
# the one at randoms[53] | |
# so we put a dummy value in Oth position of L | |
# first random will be put in position 1 etc... | |
for k in range(1, R+1): | |
L.append(unif_rand(max_rand)) | |
# This is recursion using only past | |
# We check Z is zero modulo 2**28 always | |
for n in range(110, R+1-24): | |
u = n % 55 | |
if u < 7: | |
Z = L[n] - (L[n-7] - L[n-31]- L[n-38] + L[n-55]) | |
elif u < 31: | |
Z = L[n] - (-L[n-31] + L[n-55] + L[n-62]) | |
else: | |
Z = L[n] - (L[n-55] - L[n-86]) | |
# never happens... | |
if Z % max_rand: | |
print("error", n) | |
return None | |
def test_walk(seed, reps, M): | |
""" I initially was planning to mostly use it with 55*79 | |
but already 165 = 55*3 is interesting. | |
ATTENTION MUST BE USED WITH M ODD | |
""" | |
# anyway as we work mod 2, only two seeds: 0 and 1 ! | |
print("With seed %s and runs of %s trials" % (seed, M)) | |
max_rand = 2**28 | |
init_randoms(seed) | |
# shift to start | |
# this way batches of 55 randoms correspond to 55 values of the Fibonacci lagged sequence | |
print("skipping first 54") | |
for k in range(54): | |
x = unif_rand(max_rand) | |
L = [] | |
D = {0:0, 1:0} | |
for n in range(0, reps): | |
# check how many 1s among the M first parity bits | |
S = 0 | |
for k in range(M): | |
x = ((unif_rand(max_rand) & 1) << 1) - 1 # +1 if odd, -1 if even | |
S += x | |
# S = N_odd - N_even (N_odd + N_even = M is supposed odd, so no tie.) | |
if S > 0: | |
D[1] += 1 | |
else: | |
D[0] += 1 | |
if n % 100 == 0: | |
print("observed probability of more 1s after %s batches of %s is %s" % | |
(n+1, M, D[1]/(D[0]+D[1]))) | |
print(D) | |
print(D) | |
print("observed probability of more 1s after %s batches of %s is %s" % | |
(reps, M, D[1]/(D[0]+D[1]))) | |
return chisquare(list(D.values())) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment