Last active
March 5, 2025 22:06
-
-
Save mmozeiko/1561361cd4105749f80bb0b9223e9db8 to your computer and use it in GitHub Desktop.
simple standalone C headers for PCG random number generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <stdint.h> | |
// pcg32_next() implementation matches pcg_engines::oneseq_xsh_rr_64_32 | |
// NOTE: use this only for compatibility with 32-bit architectures | |
// otherwise prefer using pcg64.h implementation with 128-bit state | |
typedef struct { | |
uint64_t state; | |
} pcg32; | |
#if defined(__clang__) | |
# define PCG32_ROR(r, c) __builtin_rotateright32((r), (c)) | |
#elif defined(_MSC_VER) | |
# include <stdlib.h> | |
# define PCG32_ROR(r, c) _rotr((r), (c)) | |
#else | |
# define PCG32_ROR(r, c) ( ((r) >> (c)) | ((r) << (-(c)&31)) ) | |
#endif | |
#define PCG32_DEFAULT_MULTIPLIER 6364136223846793005ULL | |
#define PCG32_DEFAULT_INCREMENT 1442695040888963407ULL | |
static inline uint32_t pcg32_next(pcg32* rng) | |
{ | |
uint64_t state = rng->state; | |
rng->state = state * PCG32_DEFAULT_MULTIPLIER + PCG32_DEFAULT_INCREMENT; | |
// XSH-RR | |
uint32_t value = (uint32_t)((state ^ (state >> 18)) >> 27); | |
int rot = state >> 59; | |
return PCG32_ROR(value, rot); | |
} | |
static inline uint64_t pcg32_next64(pcg32* rng) | |
{ | |
uint64_t value = pcg32_next(rng); | |
value <<= 32; | |
value |= pcg32_next(rng); | |
return value; | |
} | |
// returns value in [low,high) interval | |
static inline uint32_t pcg32_range(pcg32* rng, uint32_t low, uint32_t high) | |
{ | |
uint32_t bound = high - low; | |
// Fast Random Integer Generation in an Interval: https://arxiv.org/abs/1805.10941 | |
uint64_t m = (uint64_t)pcg32_next(rng) * (uint64_t)bound; | |
uint32_t l = (uint32_t)m; | |
if (l < bound) | |
{ | |
uint32_t t = -(int32_t)bound % bound; | |
while (l < t) | |
{ | |
m = (uint64_t)pcg32_next(rng) * (uint64_t)bound; | |
l = (uint32_t)m; | |
} | |
} | |
return low + (uint32_t)(m >> 32); | |
} | |
// returns float & double in [0,1) interval | |
static inline float pcg32_nextf(pcg32* rng) | |
{ | |
uint32_t x = pcg32_next(rng); | |
return (float)(int32_t)(x >> 8) * 0x1.0p-24f; | |
} | |
static inline double pcg32_nextd(pcg32* rng) | |
{ | |
uint64_t x = pcg32_next64(rng); | |
return (double)(int64_t)(x >> 11) * 0x1.0p-53; | |
} | |
static inline void pcg32_seed(pcg32* rng, uint64_t seed) | |
{ | |
rng->state = 0ULL; | |
pcg32_next(rng); | |
rng->state += seed; | |
pcg32_next(rng); | |
} | |
static inline void pcg32_advance(pcg32* rng, uint64_t delta) | |
{ | |
uint64_t cur_mult = PCG32_DEFAULT_MULTIPLIER; | |
uint64_t cur_plus = PCG32_DEFAULT_INCREMENT; | |
uint64_t acc_mult = 1; | |
uint64_t acc_plus = 0; | |
while (delta != 0) | |
{ | |
if (delta & 1) | |
{ | |
acc_mult *= cur_mult; | |
acc_plus = acc_plus * cur_mult + cur_plus; | |
} | |
cur_plus = (cur_mult + 1) * cur_plus; | |
cur_mult *= cur_mult; | |
delta >>= 1; | |
} | |
rng->state = acc_mult * rng->state + acc_plus; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <stdint.h> | |
// this implementation matches pcg_engines::cm_oneseq_dxsm_128_64 | |
// NOTE: you should use this on 64-bit architectures only. While it will work on 32-bit and will | |
// give exact same numbers, it will be slower. You should use this when you need to have 128-bit | |
// state on 32-bit architecture. If all you need is 64-bit random, use pcg32_next64 from pcg32.h | |
#if defined(__SIZEOF_INT128__) && !defined(__wasm__) | |
typedef unsigned __int128 pcg64_uint128; | |
#define PCG64_INIT(hi, lo) (((pcg64_uint128)(hi)) << 64) + (lo) | |
#define PCG64_IS_ZERO(r) ((r) == (pcg64_uint128)0) | |
#define PCG64_ZERO(r) (r) = (pcg64_uint128)0 | |
#define PCG64_COPY(r, x) (r) = (x) | |
#define PCG64_LO(r) (uint64_t)(r) | |
#define PCG64_HI(r) (uint64_t)((r) >> 64) | |
#define PCG64_SHR(r, c) (r) >>= (c) | |
#define PCG64_ADD(r, x) (r) += (x) | |
#define PCG64_MUL(r, x) (r) *= (x) | |
#else | |
typedef uint64_t pcg64_uint128[2]; | |
#define PCG64_INIT(hi, lo) { (lo), (hi) } | |
#define PCG64_IS_ZERO(r) (((r)[0] | (r)[1]) == 0) | |
#define PCG64_ZERO(r) (r)[0] = (r)[1] = 0 | |
#define PCG64_LO(r) (r)[0] | |
#define PCG64_HI(r) (r)[1] | |
#if defined(_MSC_VER) && defined(_M_AMD64) | |
#include <intrin.h> | |
// __assume(x) is here to prevent MSVC using SSE registers instead of just a pair of GPR's | |
// see the codegen differences between functions here: https://godbolt.org/z/rT8MscbPh | |
#define PCG64_COPY(r, x) (r)[0] = (x)[0], __assume(x), (r)[1] = (x)[1] | |
#define PCG64_SHR(r, c) (r)[0] = __shiftright128((r)[0], (r)[1], (c)), (r)[1] >>= (c) | |
#define PCG64_ADD(r, x) _addcarry_u64(_addcarry_u64(0, (r)[0], (x)[0], &(r)[0]), (r)[1], (x)[1], &(r)[1]) | |
#define PCG64_MUL(r, x) \ | |
{ \ | |
uint64_t _temp = (r)[0] * (x)[1] + (r)[1] * (x)[0]; \ | |
(r)[0] = _umul128((r)[0], (x)[0], &(r)[1]); \ | |
(r)[1] += _temp; \ | |
} | |
#elif defined(_MSC_VER) && defined(_M_ARM64) | |
#include <intrin.h> | |
#define PCG64_COPY(r, x) (r)[0] = (x)[0], (r)[1] = (x)[1] | |
#define PCG64_SHR(r, c) (r)[0] = (((r)[0] >> (c)) | ((r)[1] << (64 - (c)))), (r)[1] >>= (c) | |
#define PCG64_ADD(r, x) (r)[0] += (x)[0], (r)[1] += (x)[1] + ((r)[0] < (x)[0]) | |
#define PCG64_MUL(r, x) \ | |
{ \ | |
(r)[1] = __umulh((r)[0], (x)[0]) + (r)[0] * (x)[1] + (r)[1] * (x)[0]; \ | |
(r)[0] *= (x)[0]; \ | |
} | |
#else | |
#define PCG64_COPY(r, x) (r)[0] = (x)[0], (r)[1] = (x)[1] | |
#define PCG64_SHR(r, c) (r)[0] = (((r)[0] >> (c)) | ((r)[1] << (64 - (c)))), (r)[1] >>= (c) | |
#define PCG64_ADD(r, x) (r)[0] += (x)[0], (r)[1] += (x)[1] + ((r)[0] < (x)[0]) | |
#define PCG64_MUL(r, x) \ | |
{ \ | |
uint64_t _a = (r)[0]; \ | |
uint64_t _b = (x)[0]; \ | |
uint64_t _ll = (_a & 0xFFFFFFFF) * (_b & 0xFFFFFFFF); \ | |
uint64_t _hl = (_a >> 32) * (_b & 0xFFFFFFFF); \ | |
uint64_t _lh = (_a & 0xFFFFFFFF) * (_b >> 32); \ | |
uint64_t _hh = (_a >> 32) * (_b >> 32); \ | |
uint64_t _mid = (_ll >> 32) + (_hl & 0xFFFFFFFF) + _lh; \ | |
uint64_t _top = (_hl >> 32) + (_mid >> 32) + _hh; \ | |
(r)[1] = _a * (x)[1] + (r)[1] * _b + _top; \ | |
(r)[0] = (_mid << 32) | (_ll & 0xFFFFFFFF); \ | |
} | |
#endif | |
#endif | |
#define PCG64_CHEAP_MULTIPLIER 15750249268501108917ULL | |
#define PCG64_DEFAULT_INCREMENT_HI 6364136223846793005ULL | |
#define PCG64_DEFAULT_INCREMENT_LO 1442695040888963407ULL | |
typedef struct { | |
pcg64_uint128 state; | |
} pcg64; | |
static inline uint64_t pcg64_next(pcg64* rng) | |
{ | |
pcg64_uint128 state_mul = PCG64_INIT( 0, PCG64_CHEAP_MULTIPLIER ); | |
pcg64_uint128 state_add = PCG64_INIT( PCG64_DEFAULT_INCREMENT_HI, PCG64_DEFAULT_INCREMENT_LO ); | |
pcg64_uint128 state; | |
PCG64_COPY(state, rng->state); | |
uint64_t hi = PCG64_HI(state); | |
uint64_t lo = PCG64_LO(state); | |
PCG64_MUL(state, state_mul); | |
PCG64_ADD(state, state_add); | |
PCG64_COPY(rng->state, state); | |
// DXSM | |
hi ^= hi >> 32; | |
hi *= PCG64_CHEAP_MULTIPLIER; | |
hi ^= hi >> 48; | |
hi *= lo | 1; | |
return hi; | |
} | |
// returns value in [low,high) interval | |
static inline uint64_t pcg64_range(pcg64* rng, uint64_t low, uint64_t high) | |
{ | |
uint64_t bound = high - low; | |
#if 1 | |
// An optimal algorithm for bounded random integers: https://github.com/apple/swift/pull/39143 | |
pcg64_uint128 r1 = PCG64_INIT( 0, pcg64_next(rng) ); | |
pcg64_uint128 r2 = PCG64_INIT( 0, pcg64_next(rng) ); | |
pcg64_uint128 b = PCG64_INIT( 0, bound ); | |
PCG64_MUL(r1, b); | |
PCG64_MUL(r2, b); | |
pcg64_uint128 t1 = PCG64_INIT( 0, PCG64_LO(r1) ); | |
pcg64_uint128 t2 = PCG64_INIT( 0, PCG64_HI(r2) ); | |
PCG64_ADD(t1, t2); | |
return low + PCG64_HI(r1) + PCG64_HI(t1); | |
#else | |
// Fast Random Integer Generation in an Interval: https://arxiv.org/abs/1805.10941 | |
pcg64_uint128 m; | |
PCG64_MUL2(m, pcg64_next(rng), bound); | |
uint64_t l = PCG64_LO(m); | |
if (l < bound) | |
{ | |
uint64_t t = -(int64_t)bound % bound; | |
while (l < t) | |
{ | |
PCG64_MUL2(m, pcg64_next(rng), bound); | |
l = PCG64_LO(m); | |
} | |
} | |
return low + PCG64_HI(m); | |
#endif | |
} | |
// returns float & double in [0,1) interval | |
static inline float pcg64_nextf(pcg64* rng) | |
{ | |
uint64_t x = pcg64_next(rng); | |
return (float)(int32_t)(x >> 40) * 0x1.0p-24f; | |
} | |
static inline double pcg64_nextd(pcg64* rng) | |
{ | |
uint64_t x = pcg64_next(rng); | |
return (double)(int64_t)(x >> 11) * 0x1.0p-53; | |
} | |
static inline void pcg64_seed(pcg64* rng, uint64_t seed_high, uint64_t seed_low) | |
{ | |
pcg64_uint128 seed = PCG64_INIT( seed_high, seed_low ); | |
PCG64_ZERO(rng->state); | |
pcg64_next(rng); | |
PCG64_ADD(rng->state, seed); | |
pcg64_next(rng); | |
} | |
static inline void pcg64_advance(pcg64* rng, uint64_t delta_high, uint64_t delta_low) | |
{ | |
pcg64_uint128 cur_mult = PCG64_INIT( 0, PCG64_CHEAP_MULTIPLIER ); | |
pcg64_uint128 cur_plus = PCG64_INIT( PCG64_DEFAULT_INCREMENT_HI, PCG64_DEFAULT_INCREMENT_LO ); | |
pcg64_uint128 acc_mult = PCG64_INIT( 0ULL, 1ULL ); | |
pcg64_uint128 acc_plus = PCG64_INIT( 0ULL, 0ULL ); | |
pcg64_uint128 delta = PCG64_INIT( delta_high, delta_low ); | |
while (!PCG64_IS_ZERO(delta)) | |
{ | |
if (PCG64_LO(delta) & 1) | |
{ | |
PCG64_MUL(acc_mult, cur_mult); | |
PCG64_MUL(acc_plus, cur_mult); | |
PCG64_ADD(acc_plus, cur_plus); | |
} | |
pcg64_uint128 temp; | |
PCG64_COPY(temp, cur_plus); | |
PCG64_MUL(cur_plus, cur_mult); | |
PCG64_ADD(cur_plus, temp); | |
PCG64_MUL(cur_mult, cur_mult); | |
PCG64_SHR(delta, 1); | |
} | |
PCG64_MUL(rng->state, acc_mult); | |
PCG64_ADD(rng->state, acc_plus); | |
} |
See line 112
I see, XSL-RR. Thank you.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is this the DXSM variant of pcg64? It's in the pcg-cpp repo, but also, Tony Finch has a nice page about it: https://dotat.at/@/2023-06-21-pcg64-dxsm.html. Forgive me for being lazy and not inspecting myself.