|
//! core.rs -- standalone Welch two-sample t-test + 3-way significance verdict |
|
//! + a one-factor-at-a-time multiverse sweep over a toy two-arm reconstruction |
|
//! experiment. |
|
//! |
|
//! This file is SELF-CONTAINED: no external crates, no canon dependency. It is a |
|
//! runnable worked example for the multiverse / specification-curve methods line |
|
//! (Steegen et al. 2016; Simonsohn et al. specification curve; Gelman and Loken |
|
//! garden-of-forking-paths). It is a data point, NOT a claim about any framework. |
|
//! |
|
//! Build + run: rustc -O core.rs -o core && ./core |
|
//! |
|
//! HONESTY: the per-seed loss here is a deterministic PROXY, not BPB and not an |
|
//! optimality measure. A Tie is read jointly with n and effect size (small-N |
|
//! Welch power caveat). No arm is declared "best" beyond this proxy. |
|
|
|
/// A 3-way verdict at a chosen alpha. Names are generic on purpose (AWins/BWins): |
|
/// this is a methods example, not a claim about a specific pair of methods. |
|
#[derive(Clone, Copy, PartialEq, Eq, Debug)] |
|
enum Verdict { |
|
AWins, |
|
Tie, |
|
BWins, |
|
} |
|
|
|
fn verdict_label(v: Verdict) -> &'static str { |
|
match v { |
|
Verdict::AWins => "AWins", |
|
Verdict::Tie => "Tie", |
|
Verdict::BWins => "BWins", |
|
} |
|
} |
|
|
|
fn mean(xs: &[f64]) -> f64 { |
|
xs.iter().sum::<f64>() / xs.len() as f64 |
|
} |
|
|
|
/// Sample variance with Bessel's correction (n-1). |
|
fn var(xs: &[f64]) -> f64 { |
|
let m = mean(xs); |
|
let n = xs.len() as f64; |
|
if n < 2.0 { |
|
return 0.0; |
|
} |
|
xs.iter().map(|x| (x - m) * (x - m)).sum::<f64>() / (n - 1.0) |
|
} |
|
|
|
/// Welch two-sample t-test. Returns (mean_diff = mean(a) - mean(b), t, df, p_two_sided). |
|
fn welch(a: &[f64], b: &[f64]) -> (f64, f64, f64, f64) { |
|
let (na, nb) = (a.len() as f64, b.len() as f64); |
|
let (ma, mb) = (mean(a), mean(b)); |
|
let (va, vb) = (var(a), var(b)); |
|
let mean_diff = ma - mb; |
|
let (sa, sb) = (va / na, vb / nb); |
|
let denom = (sa + sb).sqrt(); |
|
if denom == 0.0 { |
|
// zero pooled variance: infinite separation if means differ, else a tie |
|
if mean_diff == 0.0 { |
|
return (0.0, 0.0, na + nb - 2.0, 1.0); |
|
} |
|
return (mean_diff, f64::INFINITY, na + nb - 2.0, 0.0); |
|
} |
|
let t = mean_diff / denom; |
|
let df_num = (sa + sb) * (sa + sb); |
|
let df_den = (sa * sa) / (na - 1.0) + (sb * sb) / (nb - 1.0); |
|
let df = if df_den > 0.0 { df_num / df_den } else { na + nb - 2.0 }; |
|
let p = two_sided_p_from_t(t.abs(), df); |
|
(mean_diff, t, df, p) |
|
} |
|
|
|
/// Two-sided p-value for |t| under Student-t with `df` df, via the regularized |
|
/// incomplete beta function: p = I_{df/(df+t^2)}(df/2, 1/2). |
|
fn two_sided_p_from_t(t_abs: f64, df: f64) -> f64 { |
|
if !t_abs.is_finite() { |
|
return 0.0; |
|
} |
|
let x = df / (df + t_abs * t_abs); |
|
betai(df / 2.0, 0.5, x).clamp(0.0, 1.0) |
|
} |
|
|
|
fn betai(a: f64, b: f64, x: f64) -> f64 { |
|
if x <= 0.0 { |
|
return 0.0; |
|
} |
|
if x >= 1.0 { |
|
return 1.0; |
|
} |
|
let ln_beta = ln_gamma(a + b) - ln_gamma(a) - ln_gamma(b); |
|
let front = (a * x.ln() + b * (1.0 - x).ln() + ln_beta).exp(); |
|
if x < (a + 1.0) / (a + b + 2.0) { |
|
front * betacf(a, b, x) / a |
|
} else { |
|
1.0 - front * betacf(b, a, 1.0 - x) / b |
|
} |
|
} |
|
|
|
/// Lentz continued fraction for the incomplete beta function. |
|
fn betacf(a: f64, b: f64, x: f64) -> f64 { |
|
let tiny = 1e-30_f64; |
|
let qab = a + b; |
|
let qap = a + 1.0; |
|
let qam = a - 1.0; |
|
let mut c = 1.0; |
|
let mut d = 1.0 - qab * x / qap; |
|
if d.abs() < tiny { |
|
d = tiny; |
|
} |
|
d = 1.0 / d; |
|
let mut h = d; |
|
for m in 1..200 { |
|
let m_f = m as f64; |
|
let m2 = 2.0 * m_f; |
|
let aa = m_f * (b - m_f) * x / ((qam + m2) * (a + m2)); |
|
d = 1.0 + aa * d; |
|
if d.abs() < tiny { |
|
d = tiny; |
|
} |
|
c = 1.0 + aa / c; |
|
if c.abs() < tiny { |
|
c = tiny; |
|
} |
|
d = 1.0 / d; |
|
h *= d * c; |
|
let aa2 = -(a + m_f) * (qab + m_f) * x / ((a + m2) * (qap + m2)); |
|
d = 1.0 + aa2 * d; |
|
if d.abs() < tiny { |
|
d = tiny; |
|
} |
|
c = 1.0 + aa2 / c; |
|
if c.abs() < tiny { |
|
c = tiny; |
|
} |
|
d = 1.0 / d; |
|
let del = d * c; |
|
h *= del; |
|
if (del - 1.0).abs() < 3e-12 { |
|
break; |
|
} |
|
} |
|
h |
|
} |
|
|
|
/// Lanczos approximation to ln(Gamma(z)). |
|
fn ln_gamma(z: f64) -> f64 { |
|
let g = 7.0; |
|
let c = [ |
|
0.99999999999980993, |
|
676.5203681218851, |
|
-1259.1392167224028, |
|
771.32342877765313, |
|
-176.61502916214059, |
|
12.507343278686905, |
|
-0.13857109526572012, |
|
9.9843695780195716e-6, |
|
1.5056327351493116e-7, |
|
]; |
|
if z < 0.5 { |
|
std::f64::consts::PI / ((std::f64::consts::PI * z).sin() * ln_gamma(1.0 - z).exp()) |
|
} else { |
|
let z = z - 1.0; |
|
let mut x = c[0]; |
|
for (i, &ci) in c.iter().enumerate().skip(1) { |
|
x += ci / (z + i as f64); |
|
} |
|
let t = z + g + 0.5; |
|
0.5 * (2.0 * std::f64::consts::PI).ln() + (z + 0.5) * t.ln() - t + x.ln() |
|
} |
|
} |
|
|
|
fn verdict(mean_diff: f64, p: f64, alpha: f64) -> Verdict { |
|
if p < alpha { |
|
// strict `<`: at p == alpha we declare Tie (no claim at the boundary) |
|
if mean_diff < 0.0 { |
|
Verdict::AWins // arm A has the lower (better) loss |
|
} else { |
|
Verdict::BWins |
|
} |
|
} else { |
|
Verdict::Tie |
|
} |
|
} |
|
|
|
/// Deterministic per-seed loss for a toy two-arm reconstruction experiment. |
|
/// Arm B is constructed to reconstruct slightly better on this proxy; the |
|
/// per-seed jitter is a fixed function of the seed (no RNG -> reproducible). |
|
fn arm_losses(seed_start: u64, n_seeds: usize, base: f64, gap: f64) -> (Vec<f64>, Vec<f64>) { |
|
let mut a = Vec::with_capacity(n_seeds); |
|
let mut b = Vec::with_capacity(n_seeds); |
|
for k in 0..n_seeds as u64 { |
|
let s = seed_start + k; |
|
// deterministic jitter in [-0.05, 0.05) from a simple hash of the seed |
|
let j = (((s.wrapping_mul(2654435761)) % 1000) as f64 / 1000.0 - 0.5) * 0.1; |
|
a.push(base + j); |
|
b.push(base - gap + j * 0.5); |
|
} |
|
(a, b) |
|
} |
|
|
|
#[derive(Clone, Copy)] |
|
struct Cell { |
|
n_seeds: usize, |
|
seed_start: u64, |
|
alpha: f64, |
|
} |
|
|
|
fn build_grid() -> Vec<(&'static str, Cell)> { |
|
let anchor = Cell { n_seeds: 8, seed_start: 43, alpha: 0.05 }; |
|
let mut cells = vec![("anchor", anchor)]; |
|
for &n in &[6usize, 12, 16] { |
|
cells.push(("vary_n", Cell { n_seeds: n, ..anchor })); |
|
} |
|
for &s in &[1u64, 100] { |
|
cells.push(("vary_seed", Cell { seed_start: s, ..anchor })); |
|
} |
|
cells |
|
} |
|
|
|
fn main() { |
|
let grid = build_grid(); |
|
println!("multiverse spec-curve (toy two-arm, deterministic proxy loss)"); |
|
println!("{:<12} {:>3} {:>7} {:>8} {:>12} {:>12}", "knob", "n", "sstart", "verdict", "mean_diff", "p"); |
|
let mut counts = [0usize; 3]; // AWins, Tie, BWins |
|
for (knob, c) in &grid { |
|
let (a, b) = arm_losses(c.seed_start, c.n_seeds, 4.72, 0.67); |
|
let (md, _t, _df, p) = welch(&a, &b); |
|
let v = verdict(md, p, c.alpha); |
|
match v { |
|
Verdict::AWins => counts[0] += 1, |
|
Verdict::Tie => counts[1] += 1, |
|
Verdict::BWins => counts[2] += 1, |
|
} |
|
println!("{:<12} {:>3} {:>7} {:>8} {:>12.4} {:>12.2e}", knob, c.n_seeds, c.seed_start, verdict_label(v), md, p); |
|
} |
|
let distinct = counts.iter().filter(|&&c| c > 0).count(); |
|
println!("verdict_counts: AWins={} Tie={} BWins={} (distinct={})", counts[0], counts[1], counts[2], distinct); |
|
println!("decision: {}", if distinct == 1 { "VERDICT_INVARIANT" } else { "VERDICT_FLIPS" }); |
|
} |