Skip to content

Instantly share code, notes, and snippets.

@ziap
Last active October 30, 2025 09:20
Show Gist options
  • Save ziap/70e49eafafb4be29b9644502fdbc603f to your computer and use it in GitHub Desktop.
Save ziap/70e49eafafb4be29b9644502fdbc603f to your computer and use it in GitHub Desktop.
PCG32 RSH-RR with vectorized 64-bit generation
// Vectorized PCG-32 XSH-RR with constant increment
// Original PCG-32 from: <https://www.pcg-random.org/download.html>
const Pcg32 = struct {
state: u64,
inline fn output(lanes: comptime_int, s: @Vector(lanes, u64)) @Vector(lanes, u32) {
const mask: @Vector(lanes, u64) = comptime @splat(0xffffffff);
const xorshifted = ((s ^ (s >> @splat(18))) >> @splat(27)) & mask;
const rot: @Vector(lanes, u5) = @intCast(s >> comptime @splat(59));
const out = (xorshifted >> rot) | (xorshifted << -% rot);
return @truncate(out);
}
const MUL = 0x5851f42d4c957f2d;
const INC = 0x14057b7ef767814f;
inline fn next(self: *Pcg32) u64 {
const M2 = comptime MUL *% MUL;
const C2 = comptime INC *% MUL +% INC;
const s0 = self.state;
const s1 = s0 *% MUL +% INC;
self.state = s0 *% M2 +% C2;
const out = output(2, .{ s0, s1 });
return (@as(u64, out[0]) << 32) | out[1];
}
fn fill(self: *Pcg32, lanes: comptime_int, buffer: []u8) void {
const V64 = @Vector(lanes, u64);
var s: V64 = @splat(self.state);
const m_init, const c_init, const m, const c = comptime blk: {
var m_init: V64 = undefined;
var c_init: V64 = undefined;
var m: u64 = 1;
var c: u64 = 0;
for (0..lanes) |it| {
m_init[it] = m;
c_init[it] = c;
m *%= MUL;
c = c *% MUL +% INC;
}
break :blk .{ m_init, c_init, m, c };
};
const chunk_size = lanes * @sizeOf(u32);
const vm: V64 = @splat(m);
const vc: V64 = @splat(c);
s = s *% m_init +% c_init;
var idx: usize = 0;
while (idx + chunk_size <= buffer.len) {
const out = output(lanes, s);
s = s *% vm +% vc;
const chunk = if (comptime endian == .little) out else @byteSwap(out);
const chunk_ptr: *const [chunk_size]u8 = @ptrCast(&chunk);
@memcpy(buffer[idx..idx + chunk_size], chunk_ptr);
idx += chunk_size;
}
self.state = s[0];
if (idx < buffer.len) {
const remaining = buffer.len - idx;
const out = output(lanes, s);
self.state = s[(remaining - 1) / @sizeOf(u32)];
self.state = self.state *% MUL +% INC;
const chunk = if (comptime endian == .little) out else @byteSwap(out);
const chunk_ptr: *const [chunk_size]u8 = @ptrCast(&chunk);
@memcpy(buffer[idx..], chunk_ptr[0..remaining]);
} else {
self.state = s[0];
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment