Marc-B-Reynolds · April 6, 2025 12:27
diff --git a/xoroshiro128p_fail.c b/xoroshiro128p_fail.c
 // compile with whatever then run PractRand:
 //  ./test | RNG_test stdin64 -tlmin 256KB -tf 2 -tlmax 512GB -seed 0

 //****************************************************************************
 // verbatim from: https://prng.di.unimi.it/xoroshiro128plus.c

 /*  Written in 2016-2018 by David Blackman and Sebastiano Vigna ([email protected])

 To the extent possible under law, the author has dedicated all copyright
 and related and neighboring rights to this software to the public domain
 worldwide.

 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.

 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
 IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

 #include <stdint.h>

 /* This is xoroshiro128+ 1.0, our best and fastest small-state generator
   for floating-point numbers, but its state space is large enough only
   for mild parallelism. We suggest to use its upper bits for
   floating-point generation, as it is slightly faster than
   xoroshiro128++/xoroshiro128**. It passes all tests we are aware of
   except for the four lower bits, which might fail linearity tests (and
   just those), so if low linear complexity is not considered an issue (as
   it is usually the case) it can be used to generate 64-bit outputs, too;
   moreover, this generator has a very mild Hamming-weight dependency
   making our test (http://prng.di.unimi.it/hwd.php) fail after 5 TB of
   output; we believe this slight bias cannot affect any application. If
   you are concerned, use xoroshiro128++, xoroshiro128** or xoshiro256+.

   We suggest to use a sign test to extract a random Boolean value, and
   right shifts to extract subsets of bits.

   The state must be seeded so that it is not everywhere zero. If you have
   a 64-bit seed, we suggest to seed a splitmix64 generator and use its
   output to fill s. 

   NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
   better results in our test than the 2016 version (a=55, b=14, c=36).
 */

 static inline uint64_t rotl(const uint64_t x, int k) {
 	return (x << k) | (x >> (64 - k));
 }


 static uint64_t s[2];

 uint64_t next(void) {
 	const uint64_t s0 = s[0];
 	uint64_t s1 = s[1];
 	const uint64_t result = s0 + s1;

 	s1 ^= s0;
 	s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
 	s[1] = rotl(s1, 37); // c

 	return result;
 }


 /* This is the jump function for the generator. It is equivalent
   to 2^64 calls to next(); it can be used to generate 2^64
   non-overlapping subsequences for parallel computations. */

 void jump(void) {
 	static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };

 	uint64_t s0 = 0;
 	uint64_t s1 = 0;
 	for(uint32_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
 		for(int b = 0; b < 64; b++) {
 			if (JUMP[i] & UINT64_C(1) << b) {
 				s0 ^= s[0];
 				s1 ^= s[1];
 			}
 			next();
 		}

 	s[0] = s0;
 	s[1] = s1;
 }


 /* This is the long-jump function for the generator. It is equivalent to
   2^96 calls to next(); it can be used to generate 2^32 starting points,
   from each of which jump() will generate 2^32 non-overlapping
   subsequences for parallel distributed computations. */

 void long_jump(void) {
 	static const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 };

 	uint64_t s0 = 0;
 	uint64_t s1 = 0;
 	for(uint32_t i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
 		for(int b = 0; b < 64; b++) {
 			if (LONG_JUMP[i] & UINT64_C(1) << b) {
 				s0 ^= s[0];
 				s1 ^= s[1];
 			}
 			next();
 		}

 	s[0] = s0;
 	s[1] = s1;
 }

 //*****************************************************************************
 // driver below here

 #include <stddef.h>
 #include <stdio.h>

 int main(void)
 {
  const uint32_t dist = 0; 
  
  // must be at least one bit set. init to something that isn't zero
  // and jump by '2^{64+dist}' as per defined above.
  s[0] = 0xdeadbeef;
  s[1] = 0x12345fe1;

  for(uint32_t i=0; i<dist; i++)
    jump();

  // this might work in visual studio.
 #if defined(_WIN32)
  if(_setmode(_fileno(stdout), _O_BINARY)==-1) {
    fprintf(stderr, "ERROR: _setmode() on stdout failed!\n");
    fflush(stderr);
  }
 #endif

  uint64_t buffer[1024];
  
  do {
    for(size_t i=0; i<sizeof(buffer)/sizeof(buffer[0]); i++) {
      buffer[i] = next();
    }

    size_t t = fwrite(buffer, 1, sizeof(buffer), stdout);

    if (t == sizeof(buffer)) continue;

    fprintf(stderr, "something's rotten in Denmark.");

    return -1;
    
  } while(1);

  return 0;
 }
	// compile with whatever then run PractRand:
	// ./test \| RNG_test stdin64 -tlmin 256KB -tf 2 -tlmax 512GB -seed 0

	//****************************************************************************
	// verbatim from: https://prng.di.unimi.it/xoroshiro128plus.c

	/* Written in 2016-2018 by David Blackman and Sebastiano Vigna ([email protected])

	To the extent possible under law, the author has dedicated all copyright
	and related and neighboring rights to this software to the public domain
	worldwide.

	Permission to use, copy, modify, and/or distribute this software for any
	purpose with or without fee is hereby granted.

	THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
	WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
	MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
	ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
	WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
	ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
	IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

	#include <stdint.h>

	/* This is xoroshiro128+ 1.0, our best and fastest small-state generator
	for floating-point numbers, but its state space is large enough only
	for mild parallelism. We suggest to use its upper bits for
	floating-point generation, as it is slightly faster than
	xoroshiro128++/xoroshiro128**. It passes all tests we are aware of
	except for the four lower bits, which might fail linearity tests (and
	just those), so if low linear complexity is not considered an issue (as
	it is usually the case) it can be used to generate 64-bit outputs, too;
	moreover, this generator has a very mild Hamming-weight dependency
	making our test (http://prng.di.unimi.it/hwd.php) fail after 5 TB of
	output; we believe this slight bias cannot affect any application. If
	you are concerned, use xoroshiro128++, xoroshiro128** or xoshiro256+.

	We suggest to use a sign test to extract a random Boolean value, and
	right shifts to extract subsets of bits.

	The state must be seeded so that it is not everywhere zero. If you have
	a 64-bit seed, we suggest to seed a splitmix64 generator and use its
	output to fill s.

	NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
	better results in our test than the 2016 version (a=55, b=14, c=36).
	*/

	static inline uint64_t rotl(const uint64_t x, int k) {
	return (x << k) \| (x >> (64 - k));
	}


	static uint64_t s[2];

	uint64_t next(void) {
	const uint64_t s0 = s[0];
	uint64_t s1 = s[1];
	const uint64_t result = s0 + s1;

	s1 ^= s0;
	s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
	s[1] = rotl(s1, 37); // c

	return result;
	}


	/* This is the jump function for the generator. It is equivalent
	to 2^64 calls to next(); it can be used to generate 2^64
	non-overlapping subsequences for parallel computations. */

	void jump(void) {
	static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };

	uint64_t s0 = 0;
	uint64_t s1 = 0;
	for(uint32_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
	for(int b = 0; b < 64; b++) {
	if (JUMP[i] & UINT64_C(1) << b) {
	s0 ^= s[0];
	s1 ^= s[1];
	}
	next();
	}

	s[0] = s0;
	s[1] = s1;
	}


	/* This is the long-jump function for the generator. It is equivalent to
	2^96 calls to next(); it can be used to generate 2^32 starting points,
	from each of which jump() will generate 2^32 non-overlapping
	subsequences for parallel distributed computations. */

	void long_jump(void) {
	static const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 };

	uint64_t s0 = 0;
	uint64_t s1 = 0;
	for(uint32_t i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
	for(int b = 0; b < 64; b++) {
	if (LONG_JUMP[i] & UINT64_C(1) << b) {
	s0 ^= s[0];
	s1 ^= s[1];
	}
	next();
	}

	s[0] = s0;
	s[1] = s1;
	}

	//*****************************************************************************
	// driver below here

	#include <stddef.h>
	#include <stdio.h>

	int main(void)
	{
	const uint32_t dist = 0;

	// must be at least one bit set. init to something that isn't zero
	// and jump by '2^{64+dist}' as per defined above.
	s[0] = 0xdeadbeef;
	s[1] = 0x12345fe1;

	for(uint32_t i=0; i<dist; i++)
	jump();

	// this might work in visual studio.
	#if defined(_WIN32)
	if(_setmode(_fileno(stdout), _O_BINARY)==-1) {
	fprintf(stderr, "ERROR: _setmode() on stdout failed!\n");
	fflush(stderr);
	}
	#endif

	uint64_t buffer[1024];

	do {
	for(size_t i=0; i<sizeof(buffer)/sizeof(buffer[0]); i++) {
	buffer[i] = next();
	}

	size_t t = fwrite(buffer, 1, sizeof(buffer), stdout);

	if (t == sizeof(buffer)) continue;

	fprintf(stderr, "something's rotten in Denmark.");

	return -1;

	} while(1);

	return 0;
	}