Created
May 7, 2018 07:04
-
-
Save jfbu/31fd20749bb42848c068f1ef1bd60148 to your computer and use it in GitHub Desktop.
Comparing \pdfuniformdeviate 2 (i.e. "high bit") and Python's randrange(2) in batches of say 129 draws at a time. TeX's RNG seems to have statistically significantly more often more 1s than 0s.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env/py | |
""" | |
Dimanche 06 mai 2018 à 22:18:52 | |
Pour comparaison entre \pdfuniformdeviate 2 et randrange(2) | |
""" | |
import random | |
from scipy.stats import chisquare | |
# START OF pdftex.web TRANSLATION | |
# pdftex.web code translated in Python | |
# I should use a generator, not global variables | |
# but global variables enable easier checking if needed | |
# names as in pdftex.web | |
fraction_one = 2**28 | |
fraction_half = 2**27 | |
randoms = [0] * 55 | |
j_random = 0 | |
def next_random(): | |
global j_random | |
if j_random==0: | |
new_randoms() | |
else: | |
j_random -=1 | |
def new_randoms(): | |
global j_random | |
global randoms | |
for k in range(24): | |
randoms[k] = (randoms[k] - randoms[k+31]) % fraction_one | |
for k in range(24, 55): | |
randoms[k] = (randoms[k] - randoms[k-24]) % fraction_one | |
j_random = 54 | |
def init_randoms(seed): | |
global randoms | |
j = abs(seed) | |
while j >= fraction_one: | |
j //= 2 | |
k = 1 | |
for i in range(55): | |
jj = k | |
k = (j - k) % fraction_one | |
j = jj | |
randoms[(i * 21) % 55] = j | |
new_randoms() | |
new_randoms() | |
new_randoms() | |
def unif_rand(x): | |
global j_random | |
global randoms | |
next_random() | |
# here we must round, but Python // truncates, so we are in exact | |
# opposite as when we want to truncate in a \numexpr which rounds ;-) | |
y = ( abs(x) * randoms[j_random] + fraction_half ) // fraction_one | |
if y == abs(x): | |
return 0 | |
else: | |
if x > 0: | |
return y | |
else: | |
return -y | |
# END OF pdftex.web TRANSLATION | |
def test_highbit_tex(seed, reps, M, shift=54): | |
""" | |
In reps of successive batches of M \\uniformdeviate 2 | |
count if there are more 1s than 0s. Check if that happens | |
about half of the case or not. (M odd) | |
shift says how many \\uniformdeviate to do initially. | |
Brief testing (with reps = 100000) | |
seems to indicate that when M is big enough | |
(for example 129) | |
the deviation from statistical expectation is visible and | |
very often in the direction of more 1s than 0s. | |
""" | |
if not (M & 1): | |
print("Third argument %s should have been odd" % M) | |
return None | |
max_rand = 2**28 | |
print("TeX with seed %s and runs of length %s" % (seed, M)) | |
init_randoms(seed) | |
print("skipping first %s" % shift) | |
for k in range(shift): | |
x = unif_rand(max_rand) | |
D = {0:0, 1:0} | |
for n in range(0, reps): | |
# check how many 1s among the M first parity bits | |
S = 0 | |
for k in range(M): | |
S += ( unif_rand(2) << 1 ) - 1 # 2*x - 1 | |
# S = N_odd - N_even (N_odd + N_even = M is supposed odd, so no tie.) | |
if S > 0: | |
D[1] += 1 | |
else: | |
D[0] += 1 | |
# if n % 100 == 0: | |
# print("observed probability of more 1s after %s batches of %s is %s" % | |
# (n+1, M, D[1]/(D[0]+D[1]))) | |
# print(D) | |
print(D) | |
print("TeX: observed probability of more 1s after %s batches of %s is %s" % | |
(reps, M, D[1]/(D[0]+D[1]))) | |
return chisquare(list(D.values())) | |
def test_highbit_py(seed, reps, M): | |
""" | |
In reps of successive batches of M randrange(2) | |
count if there are more 1s than 0s. Check if that happens | |
about half of the case or not. | |
Seems to give quite better result of course than the RNG | |
of pdftex.web (say for reps=100000, M=129) but is also | |
much slower despite randrange() being built-in... | |
""" | |
if not (M & 1): | |
print("Third argument %s should have been odd" % M) | |
return None | |
print("Python with seed %s, runs of length %s, %s times" % (seed, M, reps)) | |
random.seed(seed) | |
D = {0:0, 1:0} | |
for n in range(0, reps): | |
# check how many 1s among the M first parity bits | |
S = 0 | |
for k in range(M): | |
S += ( random.randrange(2) << 1 ) - 1 # 2*x - 1 | |
# S = N_odd - N_even (N_odd + N_even = M is supposed odd, so no tie.) | |
if S > 0: | |
D[1] += 1 | |
else: | |
D[0] += 1 | |
# if n % 100 == 0: | |
# print("observed probability of more 1s after %s batches of %s is %s" % | |
# (n+1, M, D[1]/(D[0]+D[1]))) | |
# print(D) | |
print(D) | |
print("Python: observed probability of more 1s after %s batches of %s is %s" % | |
(reps, M, D[1]/(D[0]+D[1]))) | |
return chisquare(list(D.values())) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment