|
// floor_oracle_selftest.rs -- FAIL-reachable self-test for the oracle. |
|
// ASCII-only. Each gate is demonstrated reachable as a FAILURE on a broken input. |
|
// Exit 0 = all PASS; exit 1 = a gate failed. |
|
|
|
include!("../core.rs"); |
|
|
|
use std::process::exit; |
|
|
|
fn check(name: &str, cond: bool, pass: &mut u32, fail: &mut u32) { |
|
if cond { *pass += 1; println!("PASS {}", name); } |
|
else { *fail += 1; println!("FAIL {}", name); } |
|
} |
|
|
|
// A deliberately WRONG param law to prove T1/T2 can fail. |
|
fn params_wrong(d: u64, l: u64) -> u64 { 999 * d + l * d } // not the real law |
|
|
|
fn main() { |
|
let mut pass = 0u32; |
|
let mut fail = 0u32; |
|
|
|
// T1: param law reproduces 493056 two independent ways. |
|
let pc = params_closed(128, 2); |
|
let pt = params_tensor_sum(128, 2); |
|
check("T1a closed law == 493056", pc == ANCHOR_PARAMS, &mut pass, &mut fail); |
|
check("T1b tensor-sum == 493056", pt == ANCHOR_PARAMS, &mut pass, &mut fail); |
|
check("T1c two derivations agree", pc == pt, &mut pass, &mut fail); |
|
// FAIL-reachability: the wrong law must NOT reproduce the anchor. |
|
check("T1d wrong law is rejected (FAIL-reachable)", params_wrong(128, 2) != ANCHOR_PARAMS, &mut pass, &mut fail); |
|
|
|
// T2: a lying floor (floor params < our params) MUST flip the verdict to at/above. |
|
let our = ANCHOR_PARAMS; |
|
let honest_floor = 150_000_000u64; // real TinyCodeLM-150M |
|
let lying_floor = 1_000u64; // absurd: smaller than our model |
|
check("T2a honest floor -> we are below", below_floor_params(honest_floor, our), &mut pass, &mut fail); |
|
check("T2b lying floor -> NOT below (verdict flips, FAIL-reachable)", !below_floor_params(lying_floor, our), &mut pass, &mut fail); |
|
|
|
// T3: Wilson 0-success ceiling strictly decreasing in n. |
|
let w4 = wilson_zero_success_upper(4); |
|
let w16 = wilson_zero_success_upper(16); |
|
let w32 = wilson_zero_success_upper(32); |
|
check("T3a w4 > w16", w4 > w16, &mut pass, &mut fail); |
|
check("T3b w16 > w32", w16 > w32, &mut pass, &mut fail); |
|
// Sanity vs pre-registered values (~0.49 / ~0.19 / ~0.11). |
|
check("T3c w4 ~0.49", (w4 - 0.490).abs() < 0.02, &mut pass, &mut fail); |
|
check("T3d w16 ~0.19", (w16 - 0.194).abs() < 0.02, &mut pass, &mut fail); |
|
check("T3e w32 ~0.11", (w32 - 0.107).abs() < 0.02, &mut pass, &mut fail); |
|
|
|
// T4: pass@1 identity c/n. |
|
check("T4a pass@1(16,4) == 0.25", (pass_at_1(16, 4) - 0.25).abs() < 1e-12, &mut pass, &mut fail); |
|
check("T4b pass@1(32,0) == 0.0", pass_at_1(32, 0) == 0.0, &mut pass, &mut fail); |
|
|
|
// T5: pass@k boundaries + monotone non-decreasing in c. |
|
check("T5a pass@k c=0 -> 0", pass_at_k(16, 0, 1) == 0.0, &mut pass, &mut fail); |
|
check("T5b pass@k c=n -> 1", (pass_at_k(16, 16, 4) - 1.0).abs() < 1e-12, &mut pass, &mut fail); |
|
let mut monotone = true; |
|
let mut prev = -1.0f64; |
|
for c in 0..=16u64 { |
|
let v = pass_at_k(16, c, 4); |
|
if v + 1e-12 < prev { monotone = false; } |
|
prev = v; |
|
} |
|
check("T5c pass@k monotone non-decreasing in c", monotone, &mut pass, &mut fail); |
|
// FAIL-reachability: a decreasing-in-c "rate" would trip this; prove the guard works. |
|
let broken_monotone = { |
|
let seq = [0.0f64, 0.5, 0.4]; // deliberately decreasing |
|
let mut ok = true; let mut p = -1.0; |
|
for &v in &seq { if v + 1e-12 < p { ok = false; } p = v; } |
|
ok |
|
}; |
|
check("T5d broken decreasing seq is caught (FAIL-reachable)", !broken_monotone, &mut pass, &mut fail); |
|
|
|
// T6: no-fabrication gate. With no result.json, the oracle MUST keep capability |
|
// as the literal sentinel and emit NO float rate. We assert the sentinel string |
|
// is what the oracle writes (read its emitted report if present, else the constant). |
|
let sentinel = "TRAIN_BOX_PENDING"; |
|
let report = std::fs::read_to_string("data/floor_oracle_report.json").unwrap_or_default(); |
|
if report.is_empty() { |
|
// oracle not yet run; assert the sentinel is a non-numeric string (cannot be a rate) |
|
check("T6a sentinel is non-numeric", sentinel.parse::<f64>().is_err(), &mut pass, &mut fail); |
|
} else { |
|
let has_sentinel = report.contains("\"capability_compile_at_1\": \"TRAIN_BOX_PENDING\"") |
|
&& report.contains("\"capability_pass_at_1\": \"TRAIN_BOX_PENDING\""); |
|
check("T6a report keeps TRAIN_BOX_PENDING for both capability fields", has_sentinel, &mut pass, &mut fail); |
|
// no float capability rate may appear like "capability_pass_at_1": 0.xx |
|
let fabricated = report.contains("\"capability_pass_at_1\": 0.") |
|
|| report.contains("\"capability_compile_at_1\": 0."); |
|
check("T6b no fabricated capability rate present (FAIL-reachable)", !fabricated, &mut pass, &mut fail); |
|
} |
|
|
|
// T7: determinism -- closed-form law is pure; same inputs -> same outputs. |
|
check("T7 determinism (pure law)", params_closed(128, 2) == params_closed(128, 2) |
|
&& wilson_zero_success_upper(32) == wilson_zero_success_upper(32), &mut pass, &mut fail); |
|
|
|
println!("---"); |
|
println!("PASS={} FAIL={}", pass, fail); |
|
if fail == 0 { println!("ALL SELF-TESTS PASS"); exit(0); } |
|
else { println!("SELF-TEST FAILURES PRESENT"); exit(1); } |
|
} |