Created
April 14, 2026 10:20
-
-
Save dcondrey/00f7df9c7fbe299f35de3d8fd2bea9dc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| lscpu 2>/dev/null | grep -E 'Model name|L3' || true | |
| echo Arch: $(uname -m) | |
| echo Cores: $(nproc) | |
| free -h | grep Mem || true | |
| nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo GPU: none | |
| apt-get update -qq >/dev/null 2>&1 | |
| apt-get install -y -qq gcc >/dev/null 2>&1 | |
| curl -sSf https://sh.rustup.rs | sh -s -- -y --quiet >/dev/null 2>&1 | |
| . /root/.cargo/env | |
| cat > /tmp/posme-bench.rs << 'RSEOF' | |
| // PoSME Reference Benchmark | |
| // ========================= | |
| // Proof of Sequential Memory Execution -- single-file benchmark. | |
| // | |
| // Validates the latency-bound ASIC resistance claim by measuring | |
| // the per-step cost breakdown (hash vs memory) across arena sizes. | |
| // | |
| // BUILD: rustc -O posme-bench.rs -o posme-bench | |
| // RUN: ./posme-bench | |
| // | |
| // No external dependencies. Uses a built-in minimal hash (SipHash | |
| // via std) for portability. Production implementations should use | |
| // BLAKE3; the latency-bound property is independent of hash choice | |
| // as long as hash time << memory read time. | |
| use std::collections::hash_map::DefaultHasher; | |
| use std::hash::{Hash, Hasher}; | |
| use std::time::Instant; | |
| const LAMBDA: usize = 32; | |
| #[derive(Clone, Copy)] | |
| struct Block { | |
| data: [u8; LAMBDA], | |
| causal: [u8; LAMBDA], | |
| } | |
| impl Block { | |
| fn zeroed() -> Self { | |
| Self { | |
| data: [0u8; LAMBDA], | |
| causal: [0u8; LAMBDA], | |
| } | |
| } | |
| } | |
| fn fast_hash(inputs: &[&[u8]]) -> [u8; LAMBDA] { | |
| let mut h = DefaultHasher::new(); | |
| for inp in inputs { | |
| inp.hash(&mut h); | |
| } | |
| let v = h.finish(); | |
| let mut out = [0u8; LAMBDA]; | |
| let bytes = v.to_le_bytes(); | |
| // Fill 32 bytes by hashing twice with different seeds | |
| out[..8].copy_from_slice(&bytes); | |
| let mut h2 = DefaultHasher::new(); | |
| v.hash(&mut h2); | |
| let v2 = h2.finish(); | |
| out[8..16].copy_from_slice(&v2.to_le_bytes()); | |
| let mut h3 = DefaultHasher::new(); | |
| v2.hash(&mut h3); | |
| let v3 = h3.finish(); | |
| out[16..24].copy_from_slice(&v3.to_le_bytes()); | |
| let mut h4 = DefaultHasher::new(); | |
| v3.hash(&mut h4); | |
| out[24..32].copy_from_slice(&h4.finish().to_le_bytes()); | |
| out | |
| } | |
| fn i2osp(x: u32) -> [u8; 4] { | |
| x.to_be_bytes() | |
| } | |
| fn addr_from(cursor: &[u8; LAMBDA], j: u32, n: usize) -> usize { | |
| let h = fast_hash(&[b"addr", cursor, &j.to_be_bytes()]); | |
| u32::from_be_bytes([h[0], h[1], h[2], h[3]]) as usize % n | |
| } | |
| fn initialize(blocks: &mut [Block], seed: &[u8]) { | |
| let n = blocks.len(); | |
| blocks[0].data = fast_hash(&[b"init", seed, &i2osp(0)]); | |
| blocks[0].causal = fast_hash(&[b"causal", seed, &i2osp(0)]); | |
| for i in 1..n { | |
| let prev = blocks[i - 1].data; | |
| let skip = blocks[i / 2].data; | |
| blocks[i].data = fast_hash(&[b"init", seed, &i2osp(i as u32), &prev, &skip]); | |
| blocks[i].causal = fast_hash(&[b"causal", seed, &i2osp(i as u32)]); | |
| } | |
| } | |
| #[inline(never)] | |
| fn posme_step(blocks: &mut [Block], t_prev: &[u8; LAMBDA], t: u32, d: usize) -> [u8; LAMBDA] { | |
| let n = blocks.len(); | |
| let mut cursor = *t_prev; | |
| // THE CRITICAL LOOP: pointer-chase with data-dependent addressing. | |
| // cursor = H(cursor || block.data || block.causal) | |
| // Each iteration MUST wait for the memory read before computing | |
| // the next address. This is where latency-hardness lives. | |
| for j in 0..d { | |
| let a = addr_from(&cursor, j as u32, n); | |
| let val = blocks[a]; | |
| cursor = fast_hash(&[&cursor, &val.data, &val.causal]); | |
| } | |
| // Symbiotic write | |
| let w = addr_from(&cursor, d as u32, n); | |
| let old = blocks[w]; | |
| blocks[w].data = fast_hash(&[&old.data, &cursor, &old.causal]); | |
| blocks[w].causal = fast_hash(&[&old.causal, &cursor, &i2osp(t)]); | |
| // Transcript chain | |
| fast_hash(&[t_prev, &i2osp(t), &cursor]) | |
| } | |
| fn hex8(b: &[u8; LAMBDA]) -> String { | |
| b[..8].iter().map(|x| format!("{:02x}", x)).collect() | |
| } | |
| fn format_size(bytes: usize) -> String { | |
| if bytes >= 1 << 30 { | |
| format!("{:.1} GiB", bytes as f64 / (1u64 << 30) as f64) | |
| } else if bytes >= 1 << 20 { | |
| format!("{:.0} MiB", bytes as f64 / (1u64 << 20) as f64) | |
| } else { | |
| format!("{:.0} KiB", bytes as f64 / (1u64 << 10) as f64) | |
| } | |
| } | |
| fn run_benchmark(n: usize, k: u32, d: usize, label: &str) { | |
| let arena_bytes = n * 2 * LAMBDA; | |
| let rho = k as f64 / n as f64; | |
| println!(" {:.<44} {} arena, {} steps, rho={:.1}", | |
| format!("{} ", label), format_size(arena_bytes), k, rho); | |
| let mut blocks = vec![Block::zeroed(); n]; | |
| initialize(&mut blocks, b"posme-bench-v1"); | |
| let mut transcript = fast_hash(&[b"transcript", b"posme-bench-v1"]); | |
| let start = Instant::now(); | |
| for t in 1..=k { | |
| transcript = posme_step(&mut blocks, &transcript, t, d); | |
| } | |
| let elapsed = start.elapsed(); | |
| let total_s = elapsed.as_secs_f64(); | |
| let avg_ns = total_s / k as f64 * 1e9; | |
| let hash_est_ns = (d as f64 + 3.0) * 5.0; // ~5ns per hash (SipHash) | |
| let _mem_ns = avg_ns - hash_est_ns; | |
| let hash_pct = hash_est_ns / avg_ns * 100.0; | |
| println!(" Total time: {:.3}s", total_s); | |
| println!(" Avg step: {:.0} ns", avg_ns); | |
| println!(" Hash fraction: {:.1}%", hash_pct); | |
| println!(" T_K: {}...", hex8(&transcript)); | |
| println!(" TMTO (alpha=0): {:.1}x", 1.0 + (2.0 * rho + 1.0)); | |
| println!(); | |
| } | |
| fn main() { | |
| let d: usize = 8; | |
| println!(); | |
| println!(" ╔══════════════════════════════════════════════════════════╗"); | |
| println!(" ║ PoSME Reference Benchmark v1.0 ║"); | |
| println!(" ║ Proof of Sequential Memory Execution ║"); | |
| println!(" ║ draft-condrey-cfrg-posme ║"); | |
| println!(" ╠══════════════════════════════════════════════════════════╣"); | |
| println!(" ║ Reads/step (d): 8 ║"); | |
| println!(" ║ Hash: SipHash (std, portable) ║"); | |
| println!(" ║ Block size: 64 bytes (32 data + 32 causal) ║"); | |
| println!(" ╚══════════════════════════════════════════════════════════╝"); | |
| println!(); | |
| println!(" ── Phase 1: Cache Scaling ──────────────────────────────────"); | |
| println!(" Measures per-step cost across arena sizes to demonstrate"); | |
| println!(" the transition from cache-bound to memory-bound behavior."); | |
| println!(); | |
| // Small arenas (fit in cache) -- short runs | |
| run_benchmark(1 << 14, 1 << 14, d, "L2-resident (1 MiB)"); | |
| run_benchmark(1 << 16, 1 << 16, d, "L3-resident (4 MiB)"); | |
| run_benchmark(1 << 18, 1 << 18, d, "L3-edge (16 MiB)"); | |
| run_benchmark(1 << 20, 1 << 20, d, "DRAM-spill (64 MiB)"); | |
| println!(" ── Phase 2: Production Parameters ─────────────────────────"); | |
| println!(" 1 GiB arena, rho=1 (K=N). This is the minimum recommended"); | |
| println!(" configuration for TMTO resistance."); | |
| println!(); | |
| let n_prod = 1 << 24; // 16M blocks = 1 GiB | |
| let k_prod = n_prod as u32; // rho = 1 | |
| run_benchmark(n_prod, k_prod, d, "Production (1 GiB, rho=1)"); | |
| println!(" ── Interpretation Guide ──────────────────────────────────"); | |
| println!(); | |
| println!(" MACHINE-DEPENDENT (will vary by hardware):"); | |
| println!(" - Absolute step time (ns)"); | |
| println!(" - Total execution time (s)"); | |
| println!(); | |
| println!(" MACHINE-INDEPENDENT (should hold on any hardware):"); | |
| println!(" - Hash fraction < 10% at 1 GiB arena"); | |
| println!(" - Step time INCREASES with arena size"); | |
| println!(" - TMTO penalty matches formula: 1 + (1-a)(2p+1)"); | |
| println!(); | |
| println!(" WHY THIS MATTERS:"); | |
| println!(" If hash fraction is small, an ASIC that computes"); | |
| println!(" hashes 100x faster gains almost nothing. The"); | |
| println!(" bottleneck is memory latency, which is bounded by"); | |
| println!(" physics (~2x DDR5 vs HBM3, ~35ns vs ~20ns)."); | |
| println!(); | |
| println!(" For full details: draft-condrey-cfrg-posme"); | |
| println!(" https://lf-decentralized-trust-labs.github.io/proof-of-effort/"); | |
| println!(); | |
| } | |
| RSEOF | |
| rustc -O /tmp/posme-bench.rs -o /tmp/posme-bench | |
| /tmp/posme-bench | |
| echo POSME_DONE | |
| sleep 60 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment