Skip to content

Instantly share code, notes, and snippets.

View Rexicon226's full-sized avatar
💭
optimizing

David Rubin Rexicon226

💭
optimizing
View GitHub Profile
@Rexicon226
Rexicon226 / reedsol_recover.zig
Last active April 16, 2026 03:41
AVX{2,512}+{,GFNI} reedsol recovery for k=64
const std = @import("std");
const builtin = @import("builtin");
const L = if (builtin.cpu.has(.x86, .avx512f)) 64 else 32;
const V = @Vector(L, u8);
const pshufb = @extern(*const fn (V, V) callconv(.c) V, .{ .name = switch (L) {
64 => "llvm.x86.avx512.pshuf.b.512",
32 => "llvm.x86.avx2.pshuf.b",
else => unreachable,
} }).*;
@Rexicon226
Rexicon226 / reedsol_encode.zig
Last active April 12, 2026 18:30
AVX512+GFNI reedsol encoding, for k=64
//! Efficient computation of 32 parity shreds for 32 data shreds.
//!
//! Based on the O(n log n) algorithm described in:
//! zS. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel
//! Polynomial Basis With Fast Fourier Transform and Its Application to
//! Reed–Solomon Erasure Codes," in IEEE Transactions on Information Theory,
//! vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: 10.1109/TIT.2016.2608892.
//!
//! Given 32 data shreds, we want to produce 32 parity shreds such that any
//! 32 of the 64 total shreds can reconstruct the original data. The standard
@Rexicon226
Rexicon226 / kt.c
Last active February 4, 2026 19:49
Dead simple KangarooTwelve-128 implementation
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
// Chunk size for tree hashing
#define CHUNK_SIZE 8192
#define ALIGN __attribute__ ((aligned (128)))
#define MODE 128
@Rexicon226
Rexicon226 / keccak.zig
Last active February 2, 2026 14:55
AVX-512 Keccak (mainly for parallel, not serial)
const V = @Vector(8, u64);
fn keccak(bytes: []const u8, comptime r: u32, comptime delim: u8, out: *[r]u8) void {
var state: [144]u8 align(32) = @splat(0);
var x: [5]V = @splat(@splat(0));
var input = bytes;
const rsize = 200 - 2 * r;
comptime std.debug.assert(rsize == 72); // TODO
@Rexicon226
Rexicon226 / r43x6.zig
Created January 19, 2026 00:48
6 x 43-bit curve25519 implementation
const std = @import("std");
const V = @Vector(8, u64);
const S = @Vector(8, i64);
const u64x4 = @Vector(4, u64);
const avx512 = @import("src/curves/ed25519/avx512.zig");
extern fn @"llvm.x86.avx512.vpmadd52l.uq.512"(V, V, V) V;
extern fn @"llvm.x86.avx512.vpmadd52h.uq.512"(V, V, V) V;
@Rexicon226
Rexicon226 / pht.zig
Created January 4, 2026 10:29
Compile-time perfect hash table. Based off of: https://cmph.sourceforge.net/papers/esa09.pdf. Meant for low key amounts (<= ~32).
const std = @import("std");
/// A perfect hash table for N keys of the same `length`. Useful for public key maps.
fn pht(K: type, V: type, entries: []const struct { K, V }) type {
const LAMBDA = 5;
const table_len = entries.len;
const bucket_len = (table_len + LAMBDA - 1) / LAMBDA;
const window = 4;
const length = @typeInfo(std.meta.Child(K)).array.len;
@Rexicon226
Rexicon226 / benchmark.zig
Created November 28, 2025 04:54
RVV indexOfSentinel benchmark
const std = @import("std");
const iterations_per_byte = 1000;
const warmup_iterations = 10;
pub fn main() !void {
const allocator = std.heap.smp_allocator;
// Pin the process to a single core (1)
const cpu0001: std.os.linux.cpu_set_t = [1]usize{0b0001} ++ ([_]usize{0} ** (16 - 1));
const std = @import("std");
const el: u256 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed;
const L = 1 * el;
fn heea(v: u256) struct { u128, u128 } {
std.debug.assert(len(L) == 253);
std.debug.assert(v < L);
std.debug.assert(v > 0);
From e9b300e1579d456fd307659f86d8ac0373a25fea Mon Sep 17 00:00:00 2001
From: David Rubin <david@vortan.dev>
Date: Wed, 5 Nov 2025 18:26:33 -0800
Subject: [PATCH] explicit `PROT_WRITE` in `zero_bss` is redundant
The `PAGE_WRITE` check already protects the `memset`.
The Zig compiler uses `memsz` to reserve virtual address
space that can be used by future updates, including
in non-writable sections such as `.text`.
const std = @import("std");
const Edwards25519 = std.crypto.ecc.Edwards25519;
export fn slow(s: *const [32]u8) bool {
Edwards25519.scalar.rejectNonCanonical(s.*) catch return false;
return true;
}
export fn fast(s: *const [32]u8) bool {
// If none of the top 4 bits are set, then the scalar fits into S \in [0, 2^252),