kim-em · March 7, 2026 00:25
diff --git a/ci_timing_report.py b/ci_timing_report.py
 #!/usr/bin/env python3
 """
 Mathlib CI timing report.

 Fetches recent CI run data from GitHub Actions and produces a report showing:
 - Per-step timing for the Build job (in execution order)
 - Grouped timing summary
 - All jobs overview
 - Linear regression of build time vs cache misses

 Usage:
    python3 scripts/ci_timing_report.py [--runs N]

 Requires: gh CLI authenticated with access to leanprover-community/mathlib4
 """

 import argparse
 import json
 import os
 import statistics
 import subprocess
 import sys
 import tempfile
 from collections import defaultdict
 from datetime import datetime


 REPO = "leanprover-community/mathlib4"


 def parse_time(t):
    if not t:
        return None
    return datetime.fromisoformat(t.replace("Z", "+00:00"))


 def duration_seconds(start, end):
    if not start or not end:
        return 0
    s, e = parse_time(start), parse_time(end)
    return max(0, (e - s).total_seconds())


 def fmt(secs):
    m, s = divmod(int(secs), 60)
    return f"{m}m{s:02d}s"


 def gh(*args, jq=None):
    """Run a gh command and return parsed JSON output."""
    cmd = ["gh"] + list(args)
    if jq:
        cmd.extend(["--jq", jq])
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
    if result.returncode != 0:
        return None
    output = result.stdout.strip()
    if not output:
        return None
    try:
        return json.loads(output)
    except json.JSONDecodeError:
        return output


 def gh_log(job_id):
    """Download a job's log via gh api."""
    result = subprocess.run(
        ["gh", "api", f"repos/{REPO}/actions/jobs/{job_id}/logs"],
        capture_output=True, text=True, timeout=120
    )
    if result.returncode != 0:
        return ""
    return result.stdout


 def list_bors_runs(n):
    """List recent successful bors (staging) runs."""
    runs = gh(
        "run", "list", "--repo", REPO, "--workflow", "bors.yml",
        "--limit", str(n * 2),  # fetch extra in case some failed
        "--json", "databaseId,status,conclusion,displayTitle"
    )
    if not runs:
        return []
    return [r for r in runs if r["conclusion"] == "success"][:n]


 def list_master_runs(n):
    """List recent master push runs."""
    runs = gh(
        "run", "list", "--repo", REPO, "--workflow", "build.yml",
        "--limit", str(n * 2),
        "--json", "databaseId,status,conclusion,displayTitle,headBranch,event"
    )
    if not runs:
        return []
    return [
        r for r in runs
        if r["conclusion"] == "success"
        and r["headBranch"] == "master"
        and r["event"] == "push"
    ][:n]


 def fetch_jobs(run_id):
    """Fetch all jobs for a run with their step timings."""
    data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs", "--paginate")
    if not data:
        return {}
    jobs = {}
    for job in data.get("jobs", []):
        job_name = job["name"]
        job_total = duration_seconds(job["started_at"], job["completed_at"])
        steps = []
        for step in job.get("steps", []):
            dur = duration_seconds(step["started_at"], step["completed_at"])
            steps.append({"name": step["name"], "duration": dur})
        jobs[job_name] = {"total": job_total, "steps": steps}
    return jobs


 def get_build_job_id(run_id):
    """Get the job ID for the Build job in a run."""
    data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs",
              jq='.jobs[] | select(.name | test("^ci.*Build$")) | .id')
    if data is None:
        return None
    # May be a string or int
    try:
        return int(data)
    except (ValueError, TypeError):
        return None


 def count_built_replayed(log_text):
    """Count 'Built Mathlib.*' and 'Replayed Mathlib.*' lines in a log."""
    built = 0
    replayed = 0
    for line in log_text.split("\n"):
        if "Built Mathlib." in line and "✔" in line:
            built += 1
        elif "Replayed Mathlib." in line and "✔" in line:
            replayed += 1
    return built, replayed


 # Step execution order in the Build job (from build_template.yml)
 STEP_ORDER = [
    "Set up job",
    "job info",
    "cleanup",
    "Setup jq",
    "Checkout tools branch",
    "Checkout mathlib-ci",
    "Set up CI scripts paths",
    "Checkout PR branch",
    "Prepare DownstreamTest directory",
    "Create empty directories",
    "install elan",
    "set toolchain directory",
    "set LEAN_SRC_PATH",
    "build tools-branch tools",
    "cleanup .cache/mathlib",
    "download dependencies",
    "validate lake-manifest.json inputRevs",
    "validate ProofWidgets source repository",
    "verify ProofWidgets lean-toolchain matches on versioned releases",
    "get cache (1/3 - setup and initial fetch)",
    "get cache (2/3 - test Mathlib.Init cache)",
    "get cache (3/3 - finalize cache operation)",
    "fetch ProofWidgets release",
    "update {Mathlib, Tactic, Counterexamples, Archive}.lean",
    "build mathlib",
    "print the sizes of the oleans",
    "fetch archive and counterexamples cache",
    "build archive",
    "build counterexamples",
    "stage Mathlib cache files",
    "stage Archive cache files",
    "stage Counterexamples cache files",
    "check cache staging contents",
    "upload cache staging artifact",
    "Check {Mathlib, Tactic, Counterexamples, Archive}.lean",
    "test mathlib",
    "lint mathlib",
    "kill stray runLinter processes",
    "check for noisy stdout lines",
 ]


 def categorize_step(name):
    n = name.lower()
    if "get cache" in n:
        return "Get cache"
    if n == "build mathlib":
        return "Build Mathlib"
    if n == "build archive":
        return "Build Archive"
    if n == "build counterexamples":
        return "Build Counterexamples"
    if n == "build everything":
        return "Build everything"
    if "test mathlib" in n:
        return "Run tests"
    if "lint mathlib" in n:
        return "Run linter"
    if "stage" in n and "cache" in n:
        return "Cache staging"
    if "upload cache" in n:
        return "Upload cache"
    if "verify" in n or "check cache" in n:
        return "Cache verification"
    if "import graph" in n:
        return "Import graph"
    if "check declarations" in n:
        return "Check declarations"
    if "noisy" in n or "mk_all" in n or "up to date" in n or "forbidden" in n:
        return "Post-build checks"
    if "fetch archive" in n:
        return "Fetch archive/CE cache"
    if "update {mathlib" in n:
        return "Update .lean files"
    if "print the sizes" in n:
        return "Print olean sizes"
    if "problem-match" in n or "kill stray" in n or "clean up" in n:
        return "Misc cleanup"
    if any(x in n for x in [
        "checkout", "set up job", "setup", "install", "cleanup", "job info",
        "prepare downstream", "download dependencies", "fetch proofwidgets",
        "configure lean", "set lean", "set toolchain", "build tools-branch",
        "validate", "create empty",
    ]):
        return "Setup & checkout"
    if "complete job" in n or n.startswith("post "):
        return "Job teardown"
    return "Other"


 # Groups in execution order
 GROUP_ORDER = [
    "Setup & checkout",
    "Get cache",
    "Update .lean files",
    "Build Mathlib",
    "Fetch archive/CE cache",
    "Build Archive",
    "Build Counterexamples",
    "Cache staging",
    "Upload cache",
    "Post-build checks",
    "Run tests",
    "Run linter",
    "Misc cleanup",
    "Print olean sizes",
    "Build everything",
    "Cache verification",
    "Import graph",
    "Check declarations",
    "Job teardown",
    "Other",
 ]

 # Post-Build Step steps in execution order
 POST_BUILD_STEP_ORDER = [
    "Configure Lean",
    "get cache for Mathlib",
    "get cache for Archive and Counterexamples",
    "verify that everything was available in the cache",
    "check declarations in db files",
    "generate our import graph",
    "upload the import graph",
    "clean up the import graph file",
    "build everything",
    "build AesopTest (nightly-testing only)",
 ]


 def stats_row(durs, total_mean=None):
    """Return (mean, min, max, stdev, pct_str) for a list of durations."""
    mean_d = statistics.mean(durs)
    min_d = min(durs)
    max_d = max(durs)
    std_d = statistics.stdev(durs) if len(durs) > 1 else 0
    pct = f"{mean_d / total_mean * 100:5.1f}%" if total_mean else ""
    return mean_d, min_d, max_d, std_d, pct


 def print_table(header, rows, col_widths=None):
    """Print a formatted table."""
    if col_widths is None:
        col_widths = [max(len(str(row[i])) for row in [header] + rows) for i in range(len(header))]
    # Header
    hdr = ""
    for i, (h, w) in enumerate(zip(header, col_widths)):
        if i == 0:
            hdr += f"{h:<{w}s}"
        else:
            hdr += f"  {h:>{w}s}"
    print(hdr)
    print("-" * len(hdr))
    # Rows
    for row in rows:
        line = ""
        for i, (cell, w) in enumerate(zip(row, col_widths)):
            if i == 0:
                line += f"{str(cell):<{w}s}"
            else:
                line += f"  {str(cell):>{w}s}"
        print(line)


 def analyze_build_job(runs_data, label, build_job_pattern):
    """Analyze the Build job across multiple runs."""
    # Find the Build job name
    build_job_key = None
    for rd in runs_data:
        for jname in rd["jobs"]:
            if build_job_pattern in jname and "Lint" not in jname and "Post" not in jname and "Upload" not in jname:
                build_job_key = jname
                break
        if build_job_key:
            break

    if not build_job_key:
        print(f"  No Build job found matching '{build_job_pattern}'")
        return

    # Collect per-step and grouped durations
    step_durations = defaultdict(list)
    group_durations = defaultdict(list)
    job_totals = []

    for rd in runs_data:
        if build_job_key not in rd["jobs"]:
            continue
        jdata = rd["jobs"][build_job_key]
        job_totals.append(jdata["total"])
        run_groups = defaultdict(float)
        for step in jdata["steps"]:
            step_durations[step["name"]].append(step["duration"])
            run_groups[categorize_step(step["name"])] += step["duration"]
        for cat, dur in run_groups.items():
            group_durations[cat].append(dur)

    if not job_totals:
        print(f"  No data for {build_job_key}")
        return

    total_mean = statistics.mean(job_totals)

    print(f"\n{'Build job' + ' (' + label + ')':}")
    print(f"  Job: {build_job_key}, {len(job_totals)} runs\n")

    # Grouped summary in execution order
    header = ["Stage", "Mean", "Min", "Max", "StdDev", "%"]
    rows = []
    for cat in GROUP_ORDER:
        if cat not in group_durations:
            continue
        durs = group_durations[cat]
        mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean)
        if mean_d < 0.5:
            continue
        rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct])

    # Add any groups not in GROUP_ORDER
    for cat in sorted(group_durations.keys()):
        if cat not in GROUP_ORDER:
            durs = group_durations[cat]
            mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean)
            if mean_d >= 0.5:
                rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct])

    total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0
    rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals)), fmt(total_std), ""])

    print_table(header, rows, col_widths=[30, 7, 7, 7, 7, 6])

    return build_job_key


 def analyze_post_build(runs_data, label):
    """Analyze the Post-Build Step job."""
    job_key = None
    for rd in runs_data:
        for jname in rd["jobs"]:
            if "Post-Build" in jname or "post_steps" in jname:
                job_key = jname
                break
        if job_key:
            break

    if not job_key:
        return

    step_durations = defaultdict(list)
    job_totals = []

    for rd in runs_data:
        if job_key not in rd["jobs"]:
            continue
        jdata = rd["jobs"][job_key]
        job_totals.append(jdata["total"])
        for step in jdata["steps"]:
            step_durations[step["name"]].append(step["duration"])

    if not job_totals:
        return

    print(f"\nPost-Build Step ({label})")
    print(f"  Job: {job_key}, {len(job_totals)} runs\n")

    header = ["Step", "Mean", "Min", "Max"]
    rows = []

    # In execution order
    seen = set()
    for step_name in POST_BUILD_STEP_ORDER:
        if step_name in step_durations:
            seen.add(step_name)
            durs = step_durations[step_name]
            mean_d = statistics.mean(durs)
            if mean_d < 0.5:
                continue
            rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))])

    # Any steps not in the order list
    for step_name in sorted(step_durations.keys()):
        if step_name not in seen:
            durs = step_durations[step_name]
            mean_d = statistics.mean(durs)
            if mean_d >= 0.5:
                rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))])

    total_mean = statistics.mean(job_totals)
    total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0
    rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals))])

    print_table(header, rows, col_widths=[55, 7, 7, 7])


 def analyze_all_jobs(runs_data, label):
    """Show timing for all jobs."""
    all_job_durations = defaultdict(list)
    for rd in runs_data:
        for jname, jdata in rd["jobs"].items():
            all_job_durations[jname].append(jdata["total"])

    print(f"\nAll jobs ({label})\n")
    header = ["Job", "Mean", "Min", "Max", "StdDev"]
    rows = []
    for jname, durs in sorted(all_job_durations.items(), key=lambda x: -statistics.mean(x[1])):
        mean_d, min_d, max_d, std_d, _ = stats_row(durs)
        rows.append([jname, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d)])

    print_table(header, rows, col_widths=[45, 7, 7, 7, 7])


 def regression_analysis(bors_runs_data, build_job_key):
    """Correlate build time with cache misses."""
    print("\n" + "=" * 80)
    print("BUILD TIME vs CACHE MISSES")
    print("=" * 80)

    # Collect build times from step data
    build_times = {}
    for rd in bors_runs_data:
        if build_job_key not in rd["jobs"]:
            continue
        for step in rd["jobs"][build_job_key]["steps"]:
            if step["name"] == "build mathlib":
                build_times[rd["run_id"]] = step["duration"]
                break

    if not build_times:
        print("  No build mathlib step data found.")
        return

    # Fetch logs and count Built/Replayed
    print(f"\n  Downloading build logs for {len(build_times)} runs...")
    data_points = []

    for run_id in build_times:
        job_id = get_build_job_id(run_id)
        if not job_id:
            continue
        sys.stderr.write(f"  Fetching log for run {run_id}...")
        log = gh_log(job_id)
        if not log:
            sys.stderr.write(" failed\n")
            continue
        built, replayed = count_built_replayed(log)
        sys.stderr.write(f" {built} built, {replayed} replayed\n")
        if built + replayed > 0:
            data_points.append((run_id, built, replayed, build_times[run_id]))

    if len(data_points) < 3:
        print("  Not enough data points for regression.")
        return

    # Sort by built count
    data_points.sort(key=lambda x: x[1])

    print(f"\n  {len(data_points)} runs with cache data (sorted by files built):\n")
    header = ["Run ID", "Built", "Replayed", "Total", "Miss%", "Build time", "sec/file"]
    rows = []
    x_vals = []
    y_vals = []
    for run_id, built, replayed, secs in data_points:
        total = built + replayed
        miss_pct = f"{built / total * 100:.1f}%"
        spf = f"{secs / built:.2f}s" if built > 0 else "n/a"
        rows.append([str(run_id), str(built), str(replayed), str(total), miss_pct, fmt(secs), spf])
        x_vals.append(built)
        y_vals.append(secs)

    print_table(header, rows, col_widths=[16, 6, 8, 6, 6, 10, 8])

    # Linear regression
    n = len(x_vals)
    x_mean = statistics.mean(x_vals)
    y_mean = statistics.mean(y_vals)
    ss_xy = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x_vals, y_vals))
    ss_xx = sum((xi - x_mean) ** 2 for xi in x_vals)
    if ss_xx == 0:
        print("\n  Cannot compute regression (no variance in x).")
        return

    slope = ss_xy / ss_xx
    intercept = y_mean - slope * x_mean

    y_pred = [slope * xi + intercept for xi in x_vals]
    ss_res = sum((yi - ypi) ** 2 for yi, ypi in zip(y_vals, y_pred))
    ss_tot = sum((yi - y_mean) ** 2 for yi in y_vals)
    r_squared = 1 - ss_res / ss_tot if ss_tot > 0 else 0
    r = r_squared ** 0.5 if r_squared >= 0 else 0
    if ss_xy < 0:
        r = -r

    # Non-linearity check
    x2 = [xi ** 2 for xi in x_vals]
    resids = [yi - ypi for yi, ypi in zip(y_vals, y_pred)]
    resid_mean = statistics.mean(resids)
    x2_mean = statistics.mean(x2)
    ss_rx2 = sum((ri - resid_mean) * (x2i - x2_mean) for ri, x2i in zip(resids, x2))
    ss_rr = sum((ri - resid_mean) ** 2 for ri in resids)
    ss_x2x2 = sum((x2i - x2_mean) ** 2 for x2i in x2)
    if ss_rr > 0 and ss_x2x2 > 0:
        r_resid_x2 = ss_rx2 / (ss_rr * ss_x2x2) ** 0.5
    else:
        r_resid_x2 = 0

    print(f"\n  Linear regression: build_time = {slope:.4f} * files_built + {intercept:.1f}")
    print(f"    R = {r:.4f}, R-squared = {r_squared:.4f}")
    print(f"    {slope:.3f}s per file ({slope * 1000:.1f}s per 1000 files, {slope * 1000 / 60:.1f} min per 1000 files)")
    print(f"    Baseline (0 misses): {intercept:.0f}s ({intercept / 60:.1f} min)")
    print(f"    Residual correlation with x^2: {r_resid_x2:.3f}", end="")
    if abs(r_resid_x2) > 0.3:
        print(" (some non-linearity)")
    else:
        print(" (linear model adequate)")

    # Residuals table
    print(f"\n  Residuals:\n")
    header = ["Built", "Actual", "Predicted", "Residual"]
    rows = []
    for xi, yi, ypi in sorted(zip(x_vals, y_vals, y_pred)):
        resid = yi - ypi
        rows.append([str(xi), fmt(yi), fmt(ypi), f"{resid:+.0f}s"])
    print_table(header, rows, col_widths=[8, 10, 10, 10])

    # ASCII scatter plot
    print(f"\n  Scatter: files built (x) vs build time in minutes (y)\n")
    width = 65
    height = 18
    x_min, x_max = min(x_vals), max(x_vals)
    y_max_v = max(y_vals)
    x_range = x_max - x_min or 1

    grid = [[' ' for _ in range(width)] for _ in range(height)]

    for xi, yi in zip(x_vals, y_vals):
        col = int((xi - x_min) / x_range * (width - 1))
        row = height - 1 - int(yi / (y_max_v + 1) * (height - 1))
        row = max(0, min(height - 1, row))
        col = max(0, min(width - 1, col))
        grid[row][col] = '*'

    for col in range(width):
        xi = x_min + col / (width - 1) * x_range
        yi = slope * xi + intercept
        row = height - 1 - int(yi / (y_max_v + 1) * (height - 1))
        row = max(0, min(height - 1, row))
        if grid[row][col] == ' ':
            grid[row][col] = '.'

    for row_idx in range(height):
        y_val = (height - 1 - row_idx) / (height - 1) * y_max_v
        label = f"{y_val / 60:5.1f}m"
        print(f"  {label} |{''.join(grid[row_idx])}|")
    print(f"         +{'-' * width}+")
    print(f"          {x_min:<10d}{' ' * (width - 20)}{x_max:>10d}")
    print(f"          {'files built (cache misses)':^{width}s}")
    print(f"\n    * = data point, . = regression line")


 def main():
    parser = argparse.ArgumentParser(description="Mathlib CI timing report")
    parser.add_argument("--runs", type=int, default=20,
                        help="Number of bors runs to analyze (default: 20)")
    parser.add_argument("--master-runs", type=int, default=6,
                        help="Number of master runs to analyze (default: 6)")
    parser.add_argument("--no-regression", action="store_true",
                        help="Skip the regression analysis (avoids downloading build logs)")
    args = parser.parse_args()

    print("=" * 80)
    print("MATHLIB CI TIMING REPORT")
    print("=" * 80)

    # Fetch run lists
    sys.stderr.write(f"Fetching {args.runs} bors runs and {args.master_runs} master runs...\n")
    bors_runs = list_bors_runs(args.runs)
    master_runs = list_master_runs(args.master_runs)
    sys.stderr.write(f"  Found {len(bors_runs)} bors runs, {len(master_runs)} master runs\n")

    # Fetch job details
    sys.stderr.write("Fetching job details...\n")
    bors_data = []
    for r in bors_runs:
        sys.stderr.write(f"  Run {r['databaseId']}...")
        jobs = fetch_jobs(r["databaseId"])
        if jobs:
            bors_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs})
            sys.stderr.write(f" {len(jobs)} jobs\n")
        else:
            sys.stderr.write(" failed\n")

    master_data = []
    for r in master_runs:
        sys.stderr.write(f"  Run {r['databaseId']}...")
        jobs = fetch_jobs(r["databaseId"])
        if jobs:
            master_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs})
            sys.stderr.write(f" {len(jobs)} jobs\n")
        else:
            sys.stderr.write(" failed\n")

    # Job dependency graph
    print("""
 Job dependency graph:

  style_lint ──────────────────────────────────────────────┐
  build ──────────┬────────────────────────────────────────┤
                  ├── upload_cache ──┬── post_steps ───────┤
                  │                  │                      │
                  └──────────────────┘               final (post-ci)

  style_lint and build start in parallel. Everything else chains off build.
 """)

    # Analyze bors runs
    print("=" * 80)
    print(f"BORS (STAGING) RUNS — pre-merge CI ({len(bors_data)} runs)")
    print("=" * 80)

    bors_build_key = analyze_build_job(bors_data, "bors", "Build")
    analyze_post_build(bors_data, "bors")
    analyze_all_jobs(bors_data, "bors")

    # Analyze master runs
    print("\n" + "=" * 80)
    print(f"MASTER PUSH RUNS — full cache hit ({len(master_data)} runs)")
    print("=" * 80)

    analyze_build_job(master_data, "master", "Build")
    analyze_post_build(master_data, "master")
    analyze_all_jobs(master_data, "master")

    # Regression analysis
    if not args.no_regression and bors_build_key and bors_data:
        regression_analysis(bors_data, bors_build_key)

    print()


 if __name__ == "__main__":
    main()
diff --git a/mathlib-ci-timing-report.txt b/mathlib-ci-timing-report.txt
 ================================================================================
 MATHLIB CI TIMING REPORT
 ================================================================================

 Job dependency graph:

  style_lint ──────────────────────────────────────────────┐
  build ──────────┬────────────────────────────────────────┤
                  ├── upload_cache ──┬── post_steps ───────┤
                  │                  │                      │
                  └──────────────────┘               final (post-ci)

  style_lint and build start in parallel. Everything else chains off build.

 ================================================================================
 BORS (STAGING) RUNS — pre-merge CI (20 runs)
 ================================================================================

 Build job (bors)
  Job: ci (staging) / Build, 20 runs

 Stage                              Mean      Min      Max   StdDev       %
 --------------------------------------------------------------------------
 Setup & checkout                  0m48s    0m43s    0m55s    0m02s    2.8%
 Get cache                         0m16s    0m14s    0m21s    0m02s    1.0%
 Update .lean files                0m09s    0m09s    0m12s    0m00s    0.5%
 Build Mathlib                    18m59s    0m09s   37m20s   13m15s   65.9%
 Fetch archive/CE cache            0m04s    0m03s    0m08s    0m01s    0.3%
 Build Archive                     0m15s    0m02s    0m29s    0m09s    0.9%
 Build Counterexamples             0m13s    0m02s    0m20s    0m06s    0.8%
 Cache staging                     2m05s    0m09s    4m54s    1m33s    7.2%
 Upload cache                      0m04s    0m00s    0m09s    0m02s    0.3%
 Post-build checks                 0m03s    0m02s    0m04s    0m00s    0.2%
 Run tests                         1m36s    1m21s    2m04s    0m15s    5.6%
 Run linter                        4m09s    3m55s    4m45s    0m10s   14.4%
 Misc cleanup                      0m00s    0m00s    0m01s    0m00s    0.0%
 TOTAL                            28m49s    7m29s   50m10s   15m05s        

 Post-Build Step (bors)
  Job: ci (staging) / Post-Build Step, 20 runs

 Step                                                        Mean      Min      Max
 ----------------------------------------------------------------------------------
 Configure Lean                                             0m10s    0m10s    0m12s
 get cache for Mathlib                                      0m51s    0m47s    1m02s
 get cache for Archive and Counterexamples                  0m09s    0m07s    0m11s
 verify that everything was available in the cache          0m13s    0m13s    0m16s
 check declarations in db files                             0m27s    0m23s    0m30s
 generate our import graph                                  0m27s    0m24s    0m30s
 upload the import graph                                    0m00s    0m00s    0m02s
 build everything                                           0m04s    0m04s    0m05s
 Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd    0m00s    0m00s    0m01s
 Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd    0m09s    0m07s    0m20s
 Set up job                                                 0m01s    0m01s    0m03s
 TOTAL                                                      2m40s    2m24s    3m00s

 All jobs (bors)

 Job                                               Mean      Min      Max   StdDev
 ---------------------------------------------------------------------------------
 ci (staging) / Build                            28m49s    7m29s   50m10s   15m05s
 ci (staging) / Post-Build Step                   2m40s    2m24s    3m00s    0m09s
 ci (staging) / Lint style                        2m03s    1m45s    2m28s    0m08s
 ci (staging) / Upload to cache                   0m50s    0m39s    1m06s    0m07s
 ci (staging) / Post-CI job                       0m08s    0m05s    0m11s    0m01s

 ================================================================================
 MASTER PUSH RUNS — full cache hit (6 runs)
 ================================================================================

 Build job (master)
  Job: ci / Build, 6 runs

 Stage                              Mean      Min      Max   StdDev       %
 --------------------------------------------------------------------------
 Setup & checkout                  0m47s    0m41s    0m50s    0m03s   10.9%
 Get cache                         0m14s    0m14s    0m16s    0m00s    3.4%
 Update .lean files                0m09s    0m09s    0m10s    0m00s    2.1%
 Build Mathlib                     0m05s    0m04s    0m07s    0m01s    1.3%
 Fetch archive/CE cache            0m03s    0m03s    0m04s    0m00s    0.8%
 Build Archive                     0m02s    0m02s    0m03s    0m00s    0.5%
 Build Counterexamples             0m02s    0m02s    0m03s    0m00s    0.5%
 Cache staging                     0m09s    0m07s    0m12s    0m02s    2.2%
 Post-build checks                 0m03s    0m03s    0m04s    0m00s    0.7%
 Run tests                         1m27s    1m21s    1m47s    0m09s   20.3%
 Run linter                        4m03s    3m53s    4m08s    0m05s   56.5%
 TOTAL                             7m11s    6m49s    7m42s    0m17s        

 Post-Build Step (master)
  Job: ci / Post-Build Step, 6 runs

 Step                                                        Mean      Min      Max
 ----------------------------------------------------------------------------------
 Configure Lean                                             0m10s    0m09s    0m12s
 get cache for Mathlib                                      0m48s    0m46s    0m51s
 get cache for Archive and Counterexamples                  0m08s    0m07s    0m10s
 verify that everything was available in the cache          0m13s    0m13s    0m14s
 check declarations in db files                             0m26s    0m24s    0m28s
 generate our import graph                                  0m26s    0m23s    0m28s
 upload the import graph                                    0m01s    0m00s    0m02s
 build everything                                           0m04s    0m04s    0m05s
 Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd    0m00s    0m00s    0m01s
 Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd    0m08s    0m08s    0m09s
 Set up job                                                 0m02s    0m01s    0m03s
 TOTAL                                                      2m33s    2m22s    2m42s

 All jobs (master)

 Job                                               Mean      Min      Max   StdDev
 ---------------------------------------------------------------------------------
 ci / Build                                       7m11s    6m49s    7m42s    0m17s
 ci / Post-Build Step                             2m33s    2m22s    2m42s    0m07s
 ci / Lint style                                  2m01s    1m55s    2m10s    0m06s
 ci / Post-CI job                                 0m09s    0m07s    0m11s    0m01s
 ci / Upload to cache                             0m00s    0m00s    0m00s    0m00s

 ================================================================================
 BUILD TIME vs CACHE MISSES
 ================================================================================

  Downloading build logs for 20 runs...

  20 runs with cache data (sorted by files built):

 Run ID             Built  Replayed   Total   Miss%  Build time  sec/file
 ------------------------------------------------------------------------
 22775816611          101     14504   14605    0.7%       0m09s     0.09s
 22783869343          103     14502   14605    0.7%       0m15s     0.15s
 22775422433          108     14496   14604    0.7%       0m22s     0.20s
 22784728370          201     14403   14604    1.4%       2m11s     0.65s
 22785415984          299     14306   14605    2.0%       3m33s     0.71s
 22777674484          572     13821   14393    4.0%       4m56s     0.52s
 22778844388         1695     12135   13830   12.3%      13m47s     0.49s
 22782831772         2269     11147   13416   16.9%      16m55s     0.45s
 22773803465         2377     11369   13746   17.3%      16m06s     0.41s
 22756917363         2532     10469   13001   19.5%      17m19s     0.41s
 22781597975         3337      9049   12386   26.9%      22m36s     0.41s
 22762048381         3610      8559   12169   29.7%      24m37s     0.41s
 22785974094         3905      8002   11907   32.8%      26m23s     0.41s
 22768512555         4503      7428   11931   37.7%      28m35s     0.38s
 22770264518         4599      7222   11821   38.9%      28m46s     0.38s
 22764829579         5715      4921   10636   53.7%      34m14s     0.36s
 22763438045         5766      4819   10585   54.5%      33m11s     0.35s
 22771976575         5849      4708   10557   55.4%      32m20s     0.33s
 22779837246         6345      3735   10080   62.9%      36m08s     0.34s
 22758002567         6868      2489    9357   73.4%      37m20s     0.33s

  Linear regression: build_time = 0.3361 * files_built + 118.2
    R = 0.9900, R-squared = 0.9800
    0.336s per file (336.1s per 1000 files, 5.6 min per 1000 files)
    Baseline (0 misses): 118s (2.0 min)
    Residual correlation with x^2: -0.256 (linear model adequate)

  Residuals:

 Built         Actual   Predicted    Residual
 --------------------------------------------
 101            0m09s       2m32s       -143s
 103            0m15s       2m32s       -138s
 108            0m22s       2m34s       -132s
 201            2m11s       3m05s        -55s
 299            3m33s       3m38s         -6s
 572            4m56s       5m10s        -14s
 1695          13m47s      11m27s       +139s
 2269          16m55s      14m40s       +134s
 2377          16m06s      15m17s        +49s
 2532          17m19s      16m09s        +70s
 3337          22m36s      20m39s       +116s
 3610          24m37s      22m11s       +145s
 3905          26m23s      23m50s       +152s
 4503          28m35s      27m11s        +83s
 4599          28m46s      27m43s        +62s
 5715          34m14s      33m59s        +15s
 5766          33m11s      34m16s        -65s
 5849          32m20s      34m44s       -144s
 6345          36m08s      37m30s        -83s
 6868          37m20s      40m26s       -187s

  Scatter: files built (x) vs build time in minutes (y)

   37.3m |                                                           ......|
   35.1m |                                                        ...*    *|
   32.9m |                                                    .*..         |
   30.7m |                                                ....  *          |
   28.5m |                                         ** ....                 |
   26.4m |                                   *     ...                     |
   24.2m |                                 *   ....                        |
   22.0m |                              *  ....                            |
   19.8m |                              ...                                |
   17.6m |                          ....                                   |
   15.4m |                    ***...                                       |
   13.2m |               *  ....                                           |
   11.0m |               ...                                               |
    8.8m |           ....                                                  |
    6.6m |       ....                                                      |
    4.4m |    *..                                                          |
    2.2m |.*..                                                             |
    0.0m |*                                                                |
         +-----------------------------------------------------------------+
          101                                                          6868
                             files built (cache misses)                    

    * = data point, . = regression line
	#!/usr/bin/env python3
	"""
	Mathlib CI timing report.

	Fetches recent CI run data from GitHub Actions and produces a report showing:
	- Per-step timing for the Build job (in execution order)
	- Grouped timing summary
	- All jobs overview
	- Linear regression of build time vs cache misses

	Usage:
	python3 scripts/ci_timing_report.py [--runs N]

	Requires: gh CLI authenticated with access to leanprover-community/mathlib4
	"""

	import argparse
	import json
	import os
	import statistics
	import subprocess
	import sys
	import tempfile
	from collections import defaultdict
	from datetime import datetime


	REPO = "leanprover-community/mathlib4"


	def parse_time(t):
	if not t:
	return None
	return datetime.fromisoformat(t.replace("Z", "+00:00"))


	def duration_seconds(start, end):
	if not start or not end:
	return 0
	s, e = parse_time(start), parse_time(end)
	return max(0, (e - s).total_seconds())


	def fmt(secs):
	m, s = divmod(int(secs), 60)
	return f"{m}m{s:02d}s"


	def gh(*args, jq=None):
	"""Run a gh command and return parsed JSON output."""
	cmd = ["gh"] + list(args)
	if jq:
	cmd.extend(["--jq", jq])
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
	if result.returncode != 0:
	return None
	output = result.stdout.strip()
	if not output:
	return None
	try:
	return json.loads(output)
	except json.JSONDecodeError:
	return output


	def gh_log(job_id):
	"""Download a job's log via gh api."""
	result = subprocess.run(
	["gh", "api", f"repos/{REPO}/actions/jobs/{job_id}/logs"],
	capture_output=True, text=True, timeout=120
	)
	if result.returncode != 0:
	return ""
	return result.stdout


	def list_bors_runs(n):
	"""List recent successful bors (staging) runs."""
	runs = gh(
	"run", "list", "--repo", REPO, "--workflow", "bors.yml",
	"--limit", str(n * 2), # fetch extra in case some failed
	"--json", "databaseId,status,conclusion,displayTitle"
	)
	if not runs:
	return []
	return [r for r in runs if r["conclusion"] == "success"][:n]


	def list_master_runs(n):
	"""List recent master push runs."""
	runs = gh(
	"run", "list", "--repo", REPO, "--workflow", "build.yml",
	"--limit", str(n * 2),
	"--json", "databaseId,status,conclusion,displayTitle,headBranch,event"
	)
	if not runs:
	return []
	return [
	r for r in runs
	if r["conclusion"] == "success"
	and r["headBranch"] == "master"
	and r["event"] == "push"
	][:n]


	def fetch_jobs(run_id):
	"""Fetch all jobs for a run with their step timings."""
	data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs", "--paginate")
	if not data:
	return {}
	jobs = {}
	for job in data.get("jobs", []):
	job_name = job["name"]
	job_total = duration_seconds(job["started_at"], job["completed_at"])
	steps = []
	for step in job.get("steps", []):
	dur = duration_seconds(step["started_at"], step["completed_at"])
	steps.append({"name": step["name"], "duration": dur})
	jobs[job_name] = {"total": job_total, "steps": steps}
	return jobs


	def get_build_job_id(run_id):
	"""Get the job ID for the Build job in a run."""
	data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs",
	jq='.jobs[] \| select(.name \| test("^ci.*Build$")) \| .id')
	if data is None:
	return None
	# May be a string or int
	try:
	return int(data)
	except (ValueError, TypeError):
	return None


	def count_built_replayed(log_text):
	"""Count 'Built Mathlib.' and 'Replayed Mathlib.' lines in a log."""
	built = 0
	replayed = 0
	for line in log_text.split("\n"):
	if "Built Mathlib." in line and "✔" in line:
	built += 1
	elif "Replayed Mathlib." in line and "✔" in line:
	replayed += 1
	return built, replayed


	# Step execution order in the Build job (from build_template.yml)
	STEP_ORDER = [
	"Set up job",
	"job info",
	"cleanup",
	"Setup jq",
	"Checkout tools branch",
	"Checkout mathlib-ci",
	"Set up CI scripts paths",
	"Checkout PR branch",
	"Prepare DownstreamTest directory",
	"Create empty directories",
	"install elan",
	"set toolchain directory",
	"set LEAN_SRC_PATH",
	"build tools-branch tools",
	"cleanup .cache/mathlib",
	"download dependencies",
	"validate lake-manifest.json inputRevs",
	"validate ProofWidgets source repository",
	"verify ProofWidgets lean-toolchain matches on versioned releases",
	"get cache (1/3 - setup and initial fetch)",
	"get cache (2/3 - test Mathlib.Init cache)",
	"get cache (3/3 - finalize cache operation)",
	"fetch ProofWidgets release",
	"update {Mathlib, Tactic, Counterexamples, Archive}.lean",
	"build mathlib",
	"print the sizes of the oleans",
	"fetch archive and counterexamples cache",
	"build archive",
	"build counterexamples",
	"stage Mathlib cache files",
	"stage Archive cache files",
	"stage Counterexamples cache files",
	"check cache staging contents",
	"upload cache staging artifact",
	"Check {Mathlib, Tactic, Counterexamples, Archive}.lean",
	"test mathlib",
	"lint mathlib",
	"kill stray runLinter processes",
	"check for noisy stdout lines",
	]


	def categorize_step(name):
	n = name.lower()
	if "get cache" in n:
	return "Get cache"
	if n == "build mathlib":
	return "Build Mathlib"
	if n == "build archive":
	return "Build Archive"
	if n == "build counterexamples":
	return "Build Counterexamples"
	if n == "build everything":
	return "Build everything"
	if "test mathlib" in n:
	return "Run tests"
	if "lint mathlib" in n:
	return "Run linter"
	if "stage" in n and "cache" in n:
	return "Cache staging"
	if "upload cache" in n:
	return "Upload cache"
	if "verify" in n or "check cache" in n:
	return "Cache verification"
	if "import graph" in n:
	return "Import graph"
	if "check declarations" in n:
	return "Check declarations"
	if "noisy" in n or "mk_all" in n or "up to date" in n or "forbidden" in n:
	return "Post-build checks"
	if "fetch archive" in n:
	return "Fetch archive/CE cache"
	if "update {mathlib" in n:
	return "Update .lean files"
	if "print the sizes" in n:
	return "Print olean sizes"
	if "problem-match" in n or "kill stray" in n or "clean up" in n:
	return "Misc cleanup"
	if any(x in n for x in [
	"checkout", "set up job", "setup", "install", "cleanup", "job info",
	"prepare downstream", "download dependencies", "fetch proofwidgets",
	"configure lean", "set lean", "set toolchain", "build tools-branch",
	"validate", "create empty",
	]):
	return "Setup & checkout"
	if "complete job" in n or n.startswith("post "):
	return "Job teardown"
	return "Other"


	# Groups in execution order
	GROUP_ORDER = [
	"Setup & checkout",
	"Get cache",
	"Update .lean files",
	"Build Mathlib",
	"Fetch archive/CE cache",
	"Build Archive",
	"Build Counterexamples",
	"Cache staging",
	"Upload cache",
	"Post-build checks",
	"Run tests",
	"Run linter",
	"Misc cleanup",
	"Print olean sizes",
	"Build everything",
	"Cache verification",
	"Import graph",
	"Check declarations",
	"Job teardown",
	"Other",
	]

	# Post-Build Step steps in execution order
	POST_BUILD_STEP_ORDER = [
	"Configure Lean",
	"get cache for Mathlib",
	"get cache for Archive and Counterexamples",
	"verify that everything was available in the cache",
	"check declarations in db files",
	"generate our import graph",
	"upload the import graph",
	"clean up the import graph file",
	"build everything",
	"build AesopTest (nightly-testing only)",
	]


	def stats_row(durs, total_mean=None):
	"""Return (mean, min, max, stdev, pct_str) for a list of durations."""
	mean_d = statistics.mean(durs)
	min_d = min(durs)
	max_d = max(durs)
	std_d = statistics.stdev(durs) if len(durs) > 1 else 0
	pct = f"{mean_d / total_mean * 100:5.1f}%" if total_mean else ""
	return mean_d, min_d, max_d, std_d, pct


	def print_table(header, rows, col_widths=None):
	"""Print a formatted table."""
	if col_widths is None:
	col_widths = [max(len(str(row[i])) for row in [header] + rows) for i in range(len(header))]
	# Header
	hdr = ""
	for i, (h, w) in enumerate(zip(header, col_widths)):
	if i == 0:
	hdr += f"{h:<{w}s}"
	else:
	hdr += f" {h:>{w}s}"
	print(hdr)
	print("-" * len(hdr))
	# Rows
	for row in rows:
	line = ""
	for i, (cell, w) in enumerate(zip(row, col_widths)):
	if i == 0:
	line += f"{str(cell):<{w}s}"
	else:
	line += f" {str(cell):>{w}s}"
	print(line)


	def analyze_build_job(runs_data, label, build_job_pattern):
	"""Analyze the Build job across multiple runs."""
	# Find the Build job name
	build_job_key = None
	for rd in runs_data:
	for jname in rd["jobs"]:
	if build_job_pattern in jname and "Lint" not in jname and "Post" not in jname and "Upload" not in jname:
	build_job_key = jname
	break
	if build_job_key:
	break

	if not build_job_key:
	print(f" No Build job found matching '{build_job_pattern}'")
	return

	# Collect per-step and grouped durations
	step_durations = defaultdict(list)
	group_durations = defaultdict(list)
	job_totals = []

	for rd in runs_data:
	if build_job_key not in rd["jobs"]:
	continue
	jdata = rd["jobs"][build_job_key]
	job_totals.append(jdata["total"])
	run_groups = defaultdict(float)
	for step in jdata["steps"]:
	step_durations[step["name"]].append(step["duration"])
	run_groups[categorize_step(step["name"])] += step["duration"]
	for cat, dur in run_groups.items():
	group_durations[cat].append(dur)

	if not job_totals:
	print(f" No data for {build_job_key}")
	return

	total_mean = statistics.mean(job_totals)

	print(f"\n{'Build job' + ' (' + label + ')':}")
	print(f" Job: {build_job_key}, {len(job_totals)} runs\n")

	# Grouped summary in execution order
	header = ["Stage", "Mean", "Min", "Max", "StdDev", "%"]
	rows = []
	for cat in GROUP_ORDER:
	if cat not in group_durations:
	continue
	durs = group_durations[cat]
	mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean)
	if mean_d < 0.5:
	continue
	rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct])

	# Add any groups not in GROUP_ORDER
	for cat in sorted(group_durations.keys()):
	if cat not in GROUP_ORDER:
	durs = group_durations[cat]
	mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean)
	if mean_d >= 0.5:
	rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct])

	total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0
	rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals)), fmt(total_std), ""])

	print_table(header, rows, col_widths=[30, 7, 7, 7, 7, 6])

	return build_job_key


	def analyze_post_build(runs_data, label):
	"""Analyze the Post-Build Step job."""
	job_key = None
	for rd in runs_data:
	for jname in rd["jobs"]:
	if "Post-Build" in jname or "post_steps" in jname:
	job_key = jname
	break
	if job_key:
	break

	if not job_key:
	return

	step_durations = defaultdict(list)
	job_totals = []

	for rd in runs_data:
	if job_key not in rd["jobs"]:
	continue
	jdata = rd["jobs"][job_key]
	job_totals.append(jdata["total"])
	for step in jdata["steps"]:
	step_durations[step["name"]].append(step["duration"])

	if not job_totals:
	return

	print(f"\nPost-Build Step ({label})")
	print(f" Job: {job_key}, {len(job_totals)} runs\n")

	header = ["Step", "Mean", "Min", "Max"]
	rows = []

	# In execution order
	seen = set()
	for step_name in POST_BUILD_STEP_ORDER:
	if step_name in step_durations:
	seen.add(step_name)
	durs = step_durations[step_name]
	mean_d = statistics.mean(durs)
	if mean_d < 0.5:
	continue
	rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))])

	# Any steps not in the order list
	for step_name in sorted(step_durations.keys()):
	if step_name not in seen:
	durs = step_durations[step_name]
	mean_d = statistics.mean(durs)
	if mean_d >= 0.5:
	rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))])

	total_mean = statistics.mean(job_totals)
	total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0
	rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals))])

	print_table(header, rows, col_widths=[55, 7, 7, 7])


	def analyze_all_jobs(runs_data, label):
	"""Show timing for all jobs."""
	all_job_durations = defaultdict(list)
	for rd in runs_data:
	for jname, jdata in rd["jobs"].items():
	all_job_durations[jname].append(jdata["total"])

	print(f"\nAll jobs ({label})\n")
	header = ["Job", "Mean", "Min", "Max", "StdDev"]
	rows = []
	for jname, durs in sorted(all_job_durations.items(), key=lambda x: -statistics.mean(x[1])):
	mean_d, min_d, max_d, std_d, _ = stats_row(durs)
	rows.append([jname, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d)])

	print_table(header, rows, col_widths=[45, 7, 7, 7, 7])


	def regression_analysis(bors_runs_data, build_job_key):
	"""Correlate build time with cache misses."""
	print("\n" + "=" * 80)
	print("BUILD TIME vs CACHE MISSES")
	print("=" * 80)

	# Collect build times from step data
	build_times = {}
	for rd in bors_runs_data:
	if build_job_key not in rd["jobs"]:
	continue
	for step in rd["jobs"][build_job_key]["steps"]:
	if step["name"] == "build mathlib":
	build_times[rd["run_id"]] = step["duration"]
	break

	if not build_times:
	print(" No build mathlib step data found.")
	return

	# Fetch logs and count Built/Replayed
	print(f"\n Downloading build logs for {len(build_times)} runs...")
	data_points = []

	for run_id in build_times:
	job_id = get_build_job_id(run_id)
	if not job_id:
	continue
	sys.stderr.write(f" Fetching log for run {run_id}...")
	log = gh_log(job_id)
	if not log:
	sys.stderr.write(" failed\n")
	continue
	built, replayed = count_built_replayed(log)
	sys.stderr.write(f" {built} built, {replayed} replayed\n")
	if built + replayed > 0:
	data_points.append((run_id, built, replayed, build_times[run_id]))

	if len(data_points) < 3:
	print(" Not enough data points for regression.")
	return

	# Sort by built count
	data_points.sort(key=lambda x: x[1])

	print(f"\n {len(data_points)} runs with cache data (sorted by files built):\n")
	header = ["Run ID", "Built", "Replayed", "Total", "Miss%", "Build time", "sec/file"]
	rows = []
	x_vals = []
	y_vals = []
	for run_id, built, replayed, secs in data_points:
	total = built + replayed
	miss_pct = f"{built / total * 100:.1f}%"
	spf = f"{secs / built:.2f}s" if built > 0 else "n/a"
	rows.append([str(run_id), str(built), str(replayed), str(total), miss_pct, fmt(secs), spf])
	x_vals.append(built)
	y_vals.append(secs)

	print_table(header, rows, col_widths=[16, 6, 8, 6, 6, 10, 8])

	# Linear regression
	n = len(x_vals)
	x_mean = statistics.mean(x_vals)
	y_mean = statistics.mean(y_vals)
	ss_xy = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x_vals, y_vals))
	ss_xx = sum((xi - x_mean) ** 2 for xi in x_vals)
	if ss_xx == 0:
	print("\n Cannot compute regression (no variance in x).")
	return

	slope = ss_xy / ss_xx
	intercept = y_mean - slope * x_mean

	y_pred = [slope * xi + intercept for xi in x_vals]
	ss_res = sum((yi - ypi) ** 2 for yi, ypi in zip(y_vals, y_pred))
	ss_tot = sum((yi - y_mean) ** 2 for yi in y_vals)
	r_squared = 1 - ss_res / ss_tot if ss_tot > 0 else 0
	r = r_squared ** 0.5 if r_squared >= 0 else 0
	if ss_xy < 0:
	r = -r

	# Non-linearity check
	x2 = [xi ** 2 for xi in x_vals]
	resids = [yi - ypi for yi, ypi in zip(y_vals, y_pred)]
	resid_mean = statistics.mean(resids)
	x2_mean = statistics.mean(x2)
	ss_rx2 = sum((ri - resid_mean) * (x2i - x2_mean) for ri, x2i in zip(resids, x2))
	ss_rr = sum((ri - resid_mean) ** 2 for ri in resids)
	ss_x2x2 = sum((x2i - x2_mean) ** 2 for x2i in x2)
	if ss_rr > 0 and ss_x2x2 > 0:
	r_resid_x2 = ss_rx2 / (ss_rr * ss_x2x2) ** 0.5
	else:
	r_resid_x2 = 0

	print(f"\n Linear regression: build_time = {slope:.4f} * files_built + {intercept:.1f}")
	print(f" R = {r:.4f}, R-squared = {r_squared:.4f}")
	print(f" {slope:.3f}s per file ({slope * 1000:.1f}s per 1000 files, {slope * 1000 / 60:.1f} min per 1000 files)")
	print(f" Baseline (0 misses): {intercept:.0f}s ({intercept / 60:.1f} min)")
	print(f" Residual correlation with x^2: {r_resid_x2:.3f}", end="")
	if abs(r_resid_x2) > 0.3:
	print(" (some non-linearity)")
	else:
	print(" (linear model adequate)")

	# Residuals table
	print(f"\n Residuals:\n")
	header = ["Built", "Actual", "Predicted", "Residual"]
	rows = []
	for xi, yi, ypi in sorted(zip(x_vals, y_vals, y_pred)):
	resid = yi - ypi
	rows.append([str(xi), fmt(yi), fmt(ypi), f"{resid:+.0f}s"])
	print_table(header, rows, col_widths=[8, 10, 10, 10])

	# ASCII scatter plot
	print(f"\n Scatter: files built (x) vs build time in minutes (y)\n")
	width = 65
	height = 18
	x_min, x_max = min(x_vals), max(x_vals)
	y_max_v = max(y_vals)
	x_range = x_max - x_min or 1

	grid = [[' ' for _ in range(width)] for _ in range(height)]

	for xi, yi in zip(x_vals, y_vals):
	col = int((xi - x_min) / x_range * (width - 1))
	row = height - 1 - int(yi / (y_max_v + 1) * (height - 1))
	row = max(0, min(height - 1, row))
	col = max(0, min(width - 1, col))
	grid[row][col] = '*'

	for col in range(width):
	xi = x_min + col / (width - 1) * x_range
	yi = slope * xi + intercept
	row = height - 1 - int(yi / (y_max_v + 1) * (height - 1))
	row = max(0, min(height - 1, row))
	if grid[row][col] == ' ':
	grid[row][col] = '.'

	for row_idx in range(height):
	y_val = (height - 1 - row_idx) / (height - 1) * y_max_v
	label = f"{y_val / 60:5.1f}m"
	print(f" {label} \|{''.join(grid[row_idx])}\|")
	print(f" +{'-' * width}+")
	print(f" {x_min:<10d}{' ' * (width - 20)}{x_max:>10d}")
	print(f" {'files built (cache misses)':^{width}s}")
	print(f"\n * = data point, . = regression line")


	def main():
	parser = argparse.ArgumentParser(description="Mathlib CI timing report")
	parser.add_argument("--runs", type=int, default=20,
	help="Number of bors runs to analyze (default: 20)")
	parser.add_argument("--master-runs", type=int, default=6,
	help="Number of master runs to analyze (default: 6)")
	parser.add_argument("--no-regression", action="store_true",
	help="Skip the regression analysis (avoids downloading build logs)")
	args = parser.parse_args()

	print("=" * 80)
	print("MATHLIB CI TIMING REPORT")
	print("=" * 80)

	# Fetch run lists
	sys.stderr.write(f"Fetching {args.runs} bors runs and {args.master_runs} master runs...\n")
	bors_runs = list_bors_runs(args.runs)
	master_runs = list_master_runs(args.master_runs)
	sys.stderr.write(f" Found {len(bors_runs)} bors runs, {len(master_runs)} master runs\n")

	# Fetch job details
	sys.stderr.write("Fetching job details...\n")
	bors_data = []
	for r in bors_runs:
	sys.stderr.write(f" Run {r['databaseId']}...")
	jobs = fetch_jobs(r["databaseId"])
	if jobs:
	bors_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs})
	sys.stderr.write(f" {len(jobs)} jobs\n")
	else:
	sys.stderr.write(" failed\n")

	master_data = []
	for r in master_runs:
	sys.stderr.write(f" Run {r['databaseId']}...")
	jobs = fetch_jobs(r["databaseId"])
	if jobs:
	master_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs})
	sys.stderr.write(f" {len(jobs)} jobs\n")
	else:
	sys.stderr.write(" failed\n")

	# Job dependency graph
	print("""
	Job dependency graph:

	style_lint ──────────────────────────────────────────────┐
	build ──────────┬────────────────────────────────────────┤
	├── upload_cache ──┬── post_steps ───────┤
	│ │ │
	└──────────────────┘ final (post-ci)

	style_lint and build start in parallel. Everything else chains off build.
	""")

	# Analyze bors runs
	print("=" * 80)
	print(f"BORS (STAGING) RUNS — pre-merge CI ({len(bors_data)} runs)")
	print("=" * 80)

	bors_build_key = analyze_build_job(bors_data, "bors", "Build")
	analyze_post_build(bors_data, "bors")
	analyze_all_jobs(bors_data, "bors")

	# Analyze master runs
	print("\n" + "=" * 80)
	print(f"MASTER PUSH RUNS — full cache hit ({len(master_data)} runs)")
	print("=" * 80)

	analyze_build_job(master_data, "master", "Build")
	analyze_post_build(master_data, "master")
	analyze_all_jobs(master_data, "master")

	# Regression analysis
	if not args.no_regression and bors_build_key and bors_data:
	regression_analysis(bors_data, bors_build_key)

	print()


	if __name__ == "__main__":
	main()
	================================================================================
	MATHLIB CI TIMING REPORT
	================================================================================

	Job dependency graph:

	style_lint ──────────────────────────────────────────────┐
	build ──────────┬────────────────────────────────────────┤
	├── upload_cache ──┬── post_steps ───────┤
	│ │ │
	└──────────────────┘ final (post-ci)

	style_lint and build start in parallel. Everything else chains off build.

	================================================================================
	BORS (STAGING) RUNS — pre-merge CI (20 runs)
	================================================================================

	Build job (bors)
	Job: ci (staging) / Build, 20 runs

	Stage Mean Min Max StdDev %
	--------------------------------------------------------------------------
	Setup & checkout 0m48s 0m43s 0m55s 0m02s 2.8%
	Get cache 0m16s 0m14s 0m21s 0m02s 1.0%
	Update .lean files 0m09s 0m09s 0m12s 0m00s 0.5%
	Build Mathlib 18m59s 0m09s 37m20s 13m15s 65.9%
	Fetch archive/CE cache 0m04s 0m03s 0m08s 0m01s 0.3%
	Build Archive 0m15s 0m02s 0m29s 0m09s 0.9%
	Build Counterexamples 0m13s 0m02s 0m20s 0m06s 0.8%
	Cache staging 2m05s 0m09s 4m54s 1m33s 7.2%
	Upload cache 0m04s 0m00s 0m09s 0m02s 0.3%
	Post-build checks 0m03s 0m02s 0m04s 0m00s 0.2%
	Run tests 1m36s 1m21s 2m04s 0m15s 5.6%
	Run linter 4m09s 3m55s 4m45s 0m10s 14.4%
	Misc cleanup 0m00s 0m00s 0m01s 0m00s 0.0%
	TOTAL 28m49s 7m29s 50m10s 15m05s

	Post-Build Step (bors)
	Job: ci (staging) / Post-Build Step, 20 runs

	Step Mean Min Max
	----------------------------------------------------------------------------------
	Configure Lean 0m10s 0m10s 0m12s
	get cache for Mathlib 0m51s 0m47s 1m02s
	get cache for Archive and Counterexamples 0m09s 0m07s 0m11s
	verify that everything was available in the cache 0m13s 0m13s 0m16s
	check declarations in db files 0m27s 0m23s 0m30s
	generate our import graph 0m27s 0m24s 0m30s
	upload the import graph 0m00s 0m00s 0m02s
	build everything 0m04s 0m04s 0m05s
	Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m00s 0m00s 0m01s
	Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m09s 0m07s 0m20s
	Set up job 0m01s 0m01s 0m03s
	TOTAL 2m40s 2m24s 3m00s

	All jobs (bors)

	Job Mean Min Max StdDev
	---------------------------------------------------------------------------------
	ci (staging) / Build 28m49s 7m29s 50m10s 15m05s
	ci (staging) / Post-Build Step 2m40s 2m24s 3m00s 0m09s
	ci (staging) / Lint style 2m03s 1m45s 2m28s 0m08s
	ci (staging) / Upload to cache 0m50s 0m39s 1m06s 0m07s
	ci (staging) / Post-CI job 0m08s 0m05s 0m11s 0m01s

	================================================================================
	MASTER PUSH RUNS — full cache hit (6 runs)
	================================================================================

	Build job (master)
	Job: ci / Build, 6 runs

	Stage Mean Min Max StdDev %
	--------------------------------------------------------------------------
	Setup & checkout 0m47s 0m41s 0m50s 0m03s 10.9%
	Get cache 0m14s 0m14s 0m16s 0m00s 3.4%
	Update .lean files 0m09s 0m09s 0m10s 0m00s 2.1%
	Build Mathlib 0m05s 0m04s 0m07s 0m01s 1.3%
	Fetch archive/CE cache 0m03s 0m03s 0m04s 0m00s 0.8%
	Build Archive 0m02s 0m02s 0m03s 0m00s 0.5%
	Build Counterexamples 0m02s 0m02s 0m03s 0m00s 0.5%
	Cache staging 0m09s 0m07s 0m12s 0m02s 2.2%
	Post-build checks 0m03s 0m03s 0m04s 0m00s 0.7%
	Run tests 1m27s 1m21s 1m47s 0m09s 20.3%
	Run linter 4m03s 3m53s 4m08s 0m05s 56.5%
	TOTAL 7m11s 6m49s 7m42s 0m17s

	Post-Build Step (master)
	Job: ci / Post-Build Step, 6 runs

	Step Mean Min Max
	----------------------------------------------------------------------------------
	Configure Lean 0m10s 0m09s 0m12s
	get cache for Mathlib 0m48s 0m46s 0m51s
	get cache for Archive and Counterexamples 0m08s 0m07s 0m10s
	verify that everything was available in the cache 0m13s 0m13s 0m14s
	check declarations in db files 0m26s 0m24s 0m28s
	generate our import graph 0m26s 0m23s 0m28s
	upload the import graph 0m01s 0m00s 0m02s
	build everything 0m04s 0m04s 0m05s
	Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m00s 0m00s 0m01s
	Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m08s 0m08s 0m09s
	Set up job 0m02s 0m01s 0m03s
	TOTAL 2m33s 2m22s 2m42s

	All jobs (master)

	Job Mean Min Max StdDev
	---------------------------------------------------------------------------------
	ci / Build 7m11s 6m49s 7m42s 0m17s
	ci / Post-Build Step 2m33s 2m22s 2m42s 0m07s
	ci / Lint style 2m01s 1m55s 2m10s 0m06s
	ci / Post-CI job 0m09s 0m07s 0m11s 0m01s
	ci / Upload to cache 0m00s 0m00s 0m00s 0m00s

	================================================================================
	BUILD TIME vs CACHE MISSES
	================================================================================

	Downloading build logs for 20 runs...

	20 runs with cache data (sorted by files built):

	Run ID Built Replayed Total Miss% Build time sec/file
	------------------------------------------------------------------------
	22775816611 101 14504 14605 0.7% 0m09s 0.09s
	22783869343 103 14502 14605 0.7% 0m15s 0.15s
	22775422433 108 14496 14604 0.7% 0m22s 0.20s
	22784728370 201 14403 14604 1.4% 2m11s 0.65s
	22785415984 299 14306 14605 2.0% 3m33s 0.71s
	22777674484 572 13821 14393 4.0% 4m56s 0.52s
	22778844388 1695 12135 13830 12.3% 13m47s 0.49s
	22782831772 2269 11147 13416 16.9% 16m55s 0.45s
	22773803465 2377 11369 13746 17.3% 16m06s 0.41s
	22756917363 2532 10469 13001 19.5% 17m19s 0.41s
	22781597975 3337 9049 12386 26.9% 22m36s 0.41s
	22762048381 3610 8559 12169 29.7% 24m37s 0.41s
	22785974094 3905 8002 11907 32.8% 26m23s 0.41s
	22768512555 4503 7428 11931 37.7% 28m35s 0.38s
	22770264518 4599 7222 11821 38.9% 28m46s 0.38s
	22764829579 5715 4921 10636 53.7% 34m14s 0.36s
	22763438045 5766 4819 10585 54.5% 33m11s 0.35s
	22771976575 5849 4708 10557 55.4% 32m20s 0.33s
	22779837246 6345 3735 10080 62.9% 36m08s 0.34s
	22758002567 6868 2489 9357 73.4% 37m20s 0.33s

	Linear regression: build_time = 0.3361 * files_built + 118.2
	R = 0.9900, R-squared = 0.9800
	0.336s per file (336.1s per 1000 files, 5.6 min per 1000 files)
	Baseline (0 misses): 118s (2.0 min)
	Residual correlation with x^2: -0.256 (linear model adequate)

	Residuals:

	Built Actual Predicted Residual
	--------------------------------------------
	101 0m09s 2m32s -143s
	103 0m15s 2m32s -138s
	108 0m22s 2m34s -132s
	201 2m11s 3m05s -55s
	299 3m33s 3m38s -6s
	572 4m56s 5m10s -14s
	1695 13m47s 11m27s +139s
	2269 16m55s 14m40s +134s
	2377 16m06s 15m17s +49s
	2532 17m19s 16m09s +70s
	3337 22m36s 20m39s +116s
	3610 24m37s 22m11s +145s
	3905 26m23s 23m50s +152s
	4503 28m35s 27m11s +83s
	4599 28m46s 27m43s +62s
	5715 34m14s 33m59s +15s
	5766 33m11s 34m16s -65s
	5849 32m20s 34m44s -144s
	6345 36m08s 37m30s -83s
	6868 37m20s 40m26s -187s

	Scatter: files built (x) vs build time in minutes (y)

	37.3m \| ......\|
	35.1m \| ...* *\|
	32.9m \| .*.. \|
	30.7m \| .... * \|
	28.5m \| ** .... \|
	26.4m \| * ... \|
	24.2m \| * .... \|
	22.0m \| * .... \|
	19.8m \| ... \|
	17.6m \| .... \|
	15.4m \| ***... \|
	13.2m \| * .... \|
	11.0m \| ... \|
	8.8m \| .... \|
	6.6m \| .... \|
	4.4m \| *.. \|
	2.2m \|.*.. \|
	0.0m \|* \|
	+-----------------------------------------------------------------+
	101 6868
	files built (cache misses)

	* = data point, . = regression line