Created
March 7, 2026 00:25
-
-
Save kim-em/6b8de5618dbdbb679b6f62a1c44dde4c to your computer and use it in GitHub Desktop.
Mathlib CI timing report script and sample output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Mathlib CI timing report. | |
| Fetches recent CI run data from GitHub Actions and produces a report showing: | |
| - Per-step timing for the Build job (in execution order) | |
| - Grouped timing summary | |
| - All jobs overview | |
| - Linear regression of build time vs cache misses | |
| Usage: | |
| python3 scripts/ci_timing_report.py [--runs N] | |
| Requires: gh CLI authenticated with access to leanprover-community/mathlib4 | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import statistics | |
| import subprocess | |
| import sys | |
| import tempfile | |
| from collections import defaultdict | |
| from datetime import datetime | |
| REPO = "leanprover-community/mathlib4" | |
| def parse_time(t): | |
| if not t: | |
| return None | |
| return datetime.fromisoformat(t.replace("Z", "+00:00")) | |
| def duration_seconds(start, end): | |
| if not start or not end: | |
| return 0 | |
| s, e = parse_time(start), parse_time(end) | |
| return max(0, (e - s).total_seconds()) | |
| def fmt(secs): | |
| m, s = divmod(int(secs), 60) | |
| return f"{m}m{s:02d}s" | |
| def gh(*args, jq=None): | |
| """Run a gh command and return parsed JSON output.""" | |
| cmd = ["gh"] + list(args) | |
| if jq: | |
| cmd.extend(["--jq", jq]) | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) | |
| if result.returncode != 0: | |
| return None | |
| output = result.stdout.strip() | |
| if not output: | |
| return None | |
| try: | |
| return json.loads(output) | |
| except json.JSONDecodeError: | |
| return output | |
| def gh_log(job_id): | |
| """Download a job's log via gh api.""" | |
| result = subprocess.run( | |
| ["gh", "api", f"repos/{REPO}/actions/jobs/{job_id}/logs"], | |
| capture_output=True, text=True, timeout=120 | |
| ) | |
| if result.returncode != 0: | |
| return "" | |
| return result.stdout | |
| def list_bors_runs(n): | |
| """List recent successful bors (staging) runs.""" | |
| runs = gh( | |
| "run", "list", "--repo", REPO, "--workflow", "bors.yml", | |
| "--limit", str(n * 2), # fetch extra in case some failed | |
| "--json", "databaseId,status,conclusion,displayTitle" | |
| ) | |
| if not runs: | |
| return [] | |
| return [r for r in runs if r["conclusion"] == "success"][:n] | |
| def list_master_runs(n): | |
| """List recent master push runs.""" | |
| runs = gh( | |
| "run", "list", "--repo", REPO, "--workflow", "build.yml", | |
| "--limit", str(n * 2), | |
| "--json", "databaseId,status,conclusion,displayTitle,headBranch,event" | |
| ) | |
| if not runs: | |
| return [] | |
| return [ | |
| r for r in runs | |
| if r["conclusion"] == "success" | |
| and r["headBranch"] == "master" | |
| and r["event"] == "push" | |
| ][:n] | |
| def fetch_jobs(run_id): | |
| """Fetch all jobs for a run with their step timings.""" | |
| data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs", "--paginate") | |
| if not data: | |
| return {} | |
| jobs = {} | |
| for job in data.get("jobs", []): | |
| job_name = job["name"] | |
| job_total = duration_seconds(job["started_at"], job["completed_at"]) | |
| steps = [] | |
| for step in job.get("steps", []): | |
| dur = duration_seconds(step["started_at"], step["completed_at"]) | |
| steps.append({"name": step["name"], "duration": dur}) | |
| jobs[job_name] = {"total": job_total, "steps": steps} | |
| return jobs | |
| def get_build_job_id(run_id): | |
| """Get the job ID for the Build job in a run.""" | |
| data = gh("api", f"repos/{REPO}/actions/runs/{run_id}/jobs", | |
| jq='.jobs[] | select(.name | test("^ci.*Build$")) | .id') | |
| if data is None: | |
| return None | |
| # May be a string or int | |
| try: | |
| return int(data) | |
| except (ValueError, TypeError): | |
| return None | |
| def count_built_replayed(log_text): | |
| """Count 'Built Mathlib.*' and 'Replayed Mathlib.*' lines in a log.""" | |
| built = 0 | |
| replayed = 0 | |
| for line in log_text.split("\n"): | |
| if "Built Mathlib." in line and "✔" in line: | |
| built += 1 | |
| elif "Replayed Mathlib." in line and "✔" in line: | |
| replayed += 1 | |
| return built, replayed | |
| # Step execution order in the Build job (from build_template.yml) | |
| STEP_ORDER = [ | |
| "Set up job", | |
| "job info", | |
| "cleanup", | |
| "Setup jq", | |
| "Checkout tools branch", | |
| "Checkout mathlib-ci", | |
| "Set up CI scripts paths", | |
| "Checkout PR branch", | |
| "Prepare DownstreamTest directory", | |
| "Create empty directories", | |
| "install elan", | |
| "set toolchain directory", | |
| "set LEAN_SRC_PATH", | |
| "build tools-branch tools", | |
| "cleanup .cache/mathlib", | |
| "download dependencies", | |
| "validate lake-manifest.json inputRevs", | |
| "validate ProofWidgets source repository", | |
| "verify ProofWidgets lean-toolchain matches on versioned releases", | |
| "get cache (1/3 - setup and initial fetch)", | |
| "get cache (2/3 - test Mathlib.Init cache)", | |
| "get cache (3/3 - finalize cache operation)", | |
| "fetch ProofWidgets release", | |
| "update {Mathlib, Tactic, Counterexamples, Archive}.lean", | |
| "build mathlib", | |
| "print the sizes of the oleans", | |
| "fetch archive and counterexamples cache", | |
| "build archive", | |
| "build counterexamples", | |
| "stage Mathlib cache files", | |
| "stage Archive cache files", | |
| "stage Counterexamples cache files", | |
| "check cache staging contents", | |
| "upload cache staging artifact", | |
| "Check {Mathlib, Tactic, Counterexamples, Archive}.lean", | |
| "test mathlib", | |
| "lint mathlib", | |
| "kill stray runLinter processes", | |
| "check for noisy stdout lines", | |
| ] | |
| def categorize_step(name): | |
| n = name.lower() | |
| if "get cache" in n: | |
| return "Get cache" | |
| if n == "build mathlib": | |
| return "Build Mathlib" | |
| if n == "build archive": | |
| return "Build Archive" | |
| if n == "build counterexamples": | |
| return "Build Counterexamples" | |
| if n == "build everything": | |
| return "Build everything" | |
| if "test mathlib" in n: | |
| return "Run tests" | |
| if "lint mathlib" in n: | |
| return "Run linter" | |
| if "stage" in n and "cache" in n: | |
| return "Cache staging" | |
| if "upload cache" in n: | |
| return "Upload cache" | |
| if "verify" in n or "check cache" in n: | |
| return "Cache verification" | |
| if "import graph" in n: | |
| return "Import graph" | |
| if "check declarations" in n: | |
| return "Check declarations" | |
| if "noisy" in n or "mk_all" in n or "up to date" in n or "forbidden" in n: | |
| return "Post-build checks" | |
| if "fetch archive" in n: | |
| return "Fetch archive/CE cache" | |
| if "update {mathlib" in n: | |
| return "Update .lean files" | |
| if "print the sizes" in n: | |
| return "Print olean sizes" | |
| if "problem-match" in n or "kill stray" in n or "clean up" in n: | |
| return "Misc cleanup" | |
| if any(x in n for x in [ | |
| "checkout", "set up job", "setup", "install", "cleanup", "job info", | |
| "prepare downstream", "download dependencies", "fetch proofwidgets", | |
| "configure lean", "set lean", "set toolchain", "build tools-branch", | |
| "validate", "create empty", | |
| ]): | |
| return "Setup & checkout" | |
| if "complete job" in n or n.startswith("post "): | |
| return "Job teardown" | |
| return "Other" | |
| # Groups in execution order | |
| GROUP_ORDER = [ | |
| "Setup & checkout", | |
| "Get cache", | |
| "Update .lean files", | |
| "Build Mathlib", | |
| "Fetch archive/CE cache", | |
| "Build Archive", | |
| "Build Counterexamples", | |
| "Cache staging", | |
| "Upload cache", | |
| "Post-build checks", | |
| "Run tests", | |
| "Run linter", | |
| "Misc cleanup", | |
| "Print olean sizes", | |
| "Build everything", | |
| "Cache verification", | |
| "Import graph", | |
| "Check declarations", | |
| "Job teardown", | |
| "Other", | |
| ] | |
| # Post-Build Step steps in execution order | |
| POST_BUILD_STEP_ORDER = [ | |
| "Configure Lean", | |
| "get cache for Mathlib", | |
| "get cache for Archive and Counterexamples", | |
| "verify that everything was available in the cache", | |
| "check declarations in db files", | |
| "generate our import graph", | |
| "upload the import graph", | |
| "clean up the import graph file", | |
| "build everything", | |
| "build AesopTest (nightly-testing only)", | |
| ] | |
| def stats_row(durs, total_mean=None): | |
| """Return (mean, min, max, stdev, pct_str) for a list of durations.""" | |
| mean_d = statistics.mean(durs) | |
| min_d = min(durs) | |
| max_d = max(durs) | |
| std_d = statistics.stdev(durs) if len(durs) > 1 else 0 | |
| pct = f"{mean_d / total_mean * 100:5.1f}%" if total_mean else "" | |
| return mean_d, min_d, max_d, std_d, pct | |
| def print_table(header, rows, col_widths=None): | |
| """Print a formatted table.""" | |
| if col_widths is None: | |
| col_widths = [max(len(str(row[i])) for row in [header] + rows) for i in range(len(header))] | |
| # Header | |
| hdr = "" | |
| for i, (h, w) in enumerate(zip(header, col_widths)): | |
| if i == 0: | |
| hdr += f"{h:<{w}s}" | |
| else: | |
| hdr += f" {h:>{w}s}" | |
| print(hdr) | |
| print("-" * len(hdr)) | |
| # Rows | |
| for row in rows: | |
| line = "" | |
| for i, (cell, w) in enumerate(zip(row, col_widths)): | |
| if i == 0: | |
| line += f"{str(cell):<{w}s}" | |
| else: | |
| line += f" {str(cell):>{w}s}" | |
| print(line) | |
| def analyze_build_job(runs_data, label, build_job_pattern): | |
| """Analyze the Build job across multiple runs.""" | |
| # Find the Build job name | |
| build_job_key = None | |
| for rd in runs_data: | |
| for jname in rd["jobs"]: | |
| if build_job_pattern in jname and "Lint" not in jname and "Post" not in jname and "Upload" not in jname: | |
| build_job_key = jname | |
| break | |
| if build_job_key: | |
| break | |
| if not build_job_key: | |
| print(f" No Build job found matching '{build_job_pattern}'") | |
| return | |
| # Collect per-step and grouped durations | |
| step_durations = defaultdict(list) | |
| group_durations = defaultdict(list) | |
| job_totals = [] | |
| for rd in runs_data: | |
| if build_job_key not in rd["jobs"]: | |
| continue | |
| jdata = rd["jobs"][build_job_key] | |
| job_totals.append(jdata["total"]) | |
| run_groups = defaultdict(float) | |
| for step in jdata["steps"]: | |
| step_durations[step["name"]].append(step["duration"]) | |
| run_groups[categorize_step(step["name"])] += step["duration"] | |
| for cat, dur in run_groups.items(): | |
| group_durations[cat].append(dur) | |
| if not job_totals: | |
| print(f" No data for {build_job_key}") | |
| return | |
| total_mean = statistics.mean(job_totals) | |
| print(f"\n{'Build job' + ' (' + label + ')':}") | |
| print(f" Job: {build_job_key}, {len(job_totals)} runs\n") | |
| # Grouped summary in execution order | |
| header = ["Stage", "Mean", "Min", "Max", "StdDev", "%"] | |
| rows = [] | |
| for cat in GROUP_ORDER: | |
| if cat not in group_durations: | |
| continue | |
| durs = group_durations[cat] | |
| mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean) | |
| if mean_d < 0.5: | |
| continue | |
| rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct]) | |
| # Add any groups not in GROUP_ORDER | |
| for cat in sorted(group_durations.keys()): | |
| if cat not in GROUP_ORDER: | |
| durs = group_durations[cat] | |
| mean_d, min_d, max_d, std_d, pct = stats_row(durs, total_mean) | |
| if mean_d >= 0.5: | |
| rows.append([cat, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d), pct]) | |
| total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0 | |
| rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals)), fmt(total_std), ""]) | |
| print_table(header, rows, col_widths=[30, 7, 7, 7, 7, 6]) | |
| return build_job_key | |
| def analyze_post_build(runs_data, label): | |
| """Analyze the Post-Build Step job.""" | |
| job_key = None | |
| for rd in runs_data: | |
| for jname in rd["jobs"]: | |
| if "Post-Build" in jname or "post_steps" in jname: | |
| job_key = jname | |
| break | |
| if job_key: | |
| break | |
| if not job_key: | |
| return | |
| step_durations = defaultdict(list) | |
| job_totals = [] | |
| for rd in runs_data: | |
| if job_key not in rd["jobs"]: | |
| continue | |
| jdata = rd["jobs"][job_key] | |
| job_totals.append(jdata["total"]) | |
| for step in jdata["steps"]: | |
| step_durations[step["name"]].append(step["duration"]) | |
| if not job_totals: | |
| return | |
| print(f"\nPost-Build Step ({label})") | |
| print(f" Job: {job_key}, {len(job_totals)} runs\n") | |
| header = ["Step", "Mean", "Min", "Max"] | |
| rows = [] | |
| # In execution order | |
| seen = set() | |
| for step_name in POST_BUILD_STEP_ORDER: | |
| if step_name in step_durations: | |
| seen.add(step_name) | |
| durs = step_durations[step_name] | |
| mean_d = statistics.mean(durs) | |
| if mean_d < 0.5: | |
| continue | |
| rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))]) | |
| # Any steps not in the order list | |
| for step_name in sorted(step_durations.keys()): | |
| if step_name not in seen: | |
| durs = step_durations[step_name] | |
| mean_d = statistics.mean(durs) | |
| if mean_d >= 0.5: | |
| rows.append([step_name, fmt(mean_d), fmt(min(durs)), fmt(max(durs))]) | |
| total_mean = statistics.mean(job_totals) | |
| total_std = statistics.stdev(job_totals) if len(job_totals) > 1 else 0 | |
| rows.append(["TOTAL", fmt(total_mean), fmt(min(job_totals)), fmt(max(job_totals))]) | |
| print_table(header, rows, col_widths=[55, 7, 7, 7]) | |
| def analyze_all_jobs(runs_data, label): | |
| """Show timing for all jobs.""" | |
| all_job_durations = defaultdict(list) | |
| for rd in runs_data: | |
| for jname, jdata in rd["jobs"].items(): | |
| all_job_durations[jname].append(jdata["total"]) | |
| print(f"\nAll jobs ({label})\n") | |
| header = ["Job", "Mean", "Min", "Max", "StdDev"] | |
| rows = [] | |
| for jname, durs in sorted(all_job_durations.items(), key=lambda x: -statistics.mean(x[1])): | |
| mean_d, min_d, max_d, std_d, _ = stats_row(durs) | |
| rows.append([jname, fmt(mean_d), fmt(min_d), fmt(max_d), fmt(std_d)]) | |
| print_table(header, rows, col_widths=[45, 7, 7, 7, 7]) | |
| def regression_analysis(bors_runs_data, build_job_key): | |
| """Correlate build time with cache misses.""" | |
| print("\n" + "=" * 80) | |
| print("BUILD TIME vs CACHE MISSES") | |
| print("=" * 80) | |
| # Collect build times from step data | |
| build_times = {} | |
| for rd in bors_runs_data: | |
| if build_job_key not in rd["jobs"]: | |
| continue | |
| for step in rd["jobs"][build_job_key]["steps"]: | |
| if step["name"] == "build mathlib": | |
| build_times[rd["run_id"]] = step["duration"] | |
| break | |
| if not build_times: | |
| print(" No build mathlib step data found.") | |
| return | |
| # Fetch logs and count Built/Replayed | |
| print(f"\n Downloading build logs for {len(build_times)} runs...") | |
| data_points = [] | |
| for run_id in build_times: | |
| job_id = get_build_job_id(run_id) | |
| if not job_id: | |
| continue | |
| sys.stderr.write(f" Fetching log for run {run_id}...") | |
| log = gh_log(job_id) | |
| if not log: | |
| sys.stderr.write(" failed\n") | |
| continue | |
| built, replayed = count_built_replayed(log) | |
| sys.stderr.write(f" {built} built, {replayed} replayed\n") | |
| if built + replayed > 0: | |
| data_points.append((run_id, built, replayed, build_times[run_id])) | |
| if len(data_points) < 3: | |
| print(" Not enough data points for regression.") | |
| return | |
| # Sort by built count | |
| data_points.sort(key=lambda x: x[1]) | |
| print(f"\n {len(data_points)} runs with cache data (sorted by files built):\n") | |
| header = ["Run ID", "Built", "Replayed", "Total", "Miss%", "Build time", "sec/file"] | |
| rows = [] | |
| x_vals = [] | |
| y_vals = [] | |
| for run_id, built, replayed, secs in data_points: | |
| total = built + replayed | |
| miss_pct = f"{built / total * 100:.1f}%" | |
| spf = f"{secs / built:.2f}s" if built > 0 else "n/a" | |
| rows.append([str(run_id), str(built), str(replayed), str(total), miss_pct, fmt(secs), spf]) | |
| x_vals.append(built) | |
| y_vals.append(secs) | |
| print_table(header, rows, col_widths=[16, 6, 8, 6, 6, 10, 8]) | |
| # Linear regression | |
| n = len(x_vals) | |
| x_mean = statistics.mean(x_vals) | |
| y_mean = statistics.mean(y_vals) | |
| ss_xy = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x_vals, y_vals)) | |
| ss_xx = sum((xi - x_mean) ** 2 for xi in x_vals) | |
| if ss_xx == 0: | |
| print("\n Cannot compute regression (no variance in x).") | |
| return | |
| slope = ss_xy / ss_xx | |
| intercept = y_mean - slope * x_mean | |
| y_pred = [slope * xi + intercept for xi in x_vals] | |
| ss_res = sum((yi - ypi) ** 2 for yi, ypi in zip(y_vals, y_pred)) | |
| ss_tot = sum((yi - y_mean) ** 2 for yi in y_vals) | |
| r_squared = 1 - ss_res / ss_tot if ss_tot > 0 else 0 | |
| r = r_squared ** 0.5 if r_squared >= 0 else 0 | |
| if ss_xy < 0: | |
| r = -r | |
| # Non-linearity check | |
| x2 = [xi ** 2 for xi in x_vals] | |
| resids = [yi - ypi for yi, ypi in zip(y_vals, y_pred)] | |
| resid_mean = statistics.mean(resids) | |
| x2_mean = statistics.mean(x2) | |
| ss_rx2 = sum((ri - resid_mean) * (x2i - x2_mean) for ri, x2i in zip(resids, x2)) | |
| ss_rr = sum((ri - resid_mean) ** 2 for ri in resids) | |
| ss_x2x2 = sum((x2i - x2_mean) ** 2 for x2i in x2) | |
| if ss_rr > 0 and ss_x2x2 > 0: | |
| r_resid_x2 = ss_rx2 / (ss_rr * ss_x2x2) ** 0.5 | |
| else: | |
| r_resid_x2 = 0 | |
| print(f"\n Linear regression: build_time = {slope:.4f} * files_built + {intercept:.1f}") | |
| print(f" R = {r:.4f}, R-squared = {r_squared:.4f}") | |
| print(f" {slope:.3f}s per file ({slope * 1000:.1f}s per 1000 files, {slope * 1000 / 60:.1f} min per 1000 files)") | |
| print(f" Baseline (0 misses): {intercept:.0f}s ({intercept / 60:.1f} min)") | |
| print(f" Residual correlation with x^2: {r_resid_x2:.3f}", end="") | |
| if abs(r_resid_x2) > 0.3: | |
| print(" (some non-linearity)") | |
| else: | |
| print(" (linear model adequate)") | |
| # Residuals table | |
| print(f"\n Residuals:\n") | |
| header = ["Built", "Actual", "Predicted", "Residual"] | |
| rows = [] | |
| for xi, yi, ypi in sorted(zip(x_vals, y_vals, y_pred)): | |
| resid = yi - ypi | |
| rows.append([str(xi), fmt(yi), fmt(ypi), f"{resid:+.0f}s"]) | |
| print_table(header, rows, col_widths=[8, 10, 10, 10]) | |
| # ASCII scatter plot | |
| print(f"\n Scatter: files built (x) vs build time in minutes (y)\n") | |
| width = 65 | |
| height = 18 | |
| x_min, x_max = min(x_vals), max(x_vals) | |
| y_max_v = max(y_vals) | |
| x_range = x_max - x_min or 1 | |
| grid = [[' ' for _ in range(width)] for _ in range(height)] | |
| for xi, yi in zip(x_vals, y_vals): | |
| col = int((xi - x_min) / x_range * (width - 1)) | |
| row = height - 1 - int(yi / (y_max_v + 1) * (height - 1)) | |
| row = max(0, min(height - 1, row)) | |
| col = max(0, min(width - 1, col)) | |
| grid[row][col] = '*' | |
| for col in range(width): | |
| xi = x_min + col / (width - 1) * x_range | |
| yi = slope * xi + intercept | |
| row = height - 1 - int(yi / (y_max_v + 1) * (height - 1)) | |
| row = max(0, min(height - 1, row)) | |
| if grid[row][col] == ' ': | |
| grid[row][col] = '.' | |
| for row_idx in range(height): | |
| y_val = (height - 1 - row_idx) / (height - 1) * y_max_v | |
| label = f"{y_val / 60:5.1f}m" | |
| print(f" {label} |{''.join(grid[row_idx])}|") | |
| print(f" +{'-' * width}+") | |
| print(f" {x_min:<10d}{' ' * (width - 20)}{x_max:>10d}") | |
| print(f" {'files built (cache misses)':^{width}s}") | |
| print(f"\n * = data point, . = regression line") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Mathlib CI timing report") | |
| parser.add_argument("--runs", type=int, default=20, | |
| help="Number of bors runs to analyze (default: 20)") | |
| parser.add_argument("--master-runs", type=int, default=6, | |
| help="Number of master runs to analyze (default: 6)") | |
| parser.add_argument("--no-regression", action="store_true", | |
| help="Skip the regression analysis (avoids downloading build logs)") | |
| args = parser.parse_args() | |
| print("=" * 80) | |
| print("MATHLIB CI TIMING REPORT") | |
| print("=" * 80) | |
| # Fetch run lists | |
| sys.stderr.write(f"Fetching {args.runs} bors runs and {args.master_runs} master runs...\n") | |
| bors_runs = list_bors_runs(args.runs) | |
| master_runs = list_master_runs(args.master_runs) | |
| sys.stderr.write(f" Found {len(bors_runs)} bors runs, {len(master_runs)} master runs\n") | |
| # Fetch job details | |
| sys.stderr.write("Fetching job details...\n") | |
| bors_data = [] | |
| for r in bors_runs: | |
| sys.stderr.write(f" Run {r['databaseId']}...") | |
| jobs = fetch_jobs(r["databaseId"]) | |
| if jobs: | |
| bors_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs}) | |
| sys.stderr.write(f" {len(jobs)} jobs\n") | |
| else: | |
| sys.stderr.write(" failed\n") | |
| master_data = [] | |
| for r in master_runs: | |
| sys.stderr.write(f" Run {r['databaseId']}...") | |
| jobs = fetch_jobs(r["databaseId"]) | |
| if jobs: | |
| master_data.append({"run_id": r["databaseId"], "title": r["displayTitle"], "jobs": jobs}) | |
| sys.stderr.write(f" {len(jobs)} jobs\n") | |
| else: | |
| sys.stderr.write(" failed\n") | |
| # Job dependency graph | |
| print(""" | |
| Job dependency graph: | |
| style_lint ──────────────────────────────────────────────┐ | |
| build ──────────┬────────────────────────────────────────┤ | |
| ├── upload_cache ──┬── post_steps ───────┤ | |
| │ │ │ | |
| └──────────────────┘ final (post-ci) | |
| style_lint and build start in parallel. Everything else chains off build. | |
| """) | |
| # Analyze bors runs | |
| print("=" * 80) | |
| print(f"BORS (STAGING) RUNS — pre-merge CI ({len(bors_data)} runs)") | |
| print("=" * 80) | |
| bors_build_key = analyze_build_job(bors_data, "bors", "Build") | |
| analyze_post_build(bors_data, "bors") | |
| analyze_all_jobs(bors_data, "bors") | |
| # Analyze master runs | |
| print("\n" + "=" * 80) | |
| print(f"MASTER PUSH RUNS — full cache hit ({len(master_data)} runs)") | |
| print("=" * 80) | |
| analyze_build_job(master_data, "master", "Build") | |
| analyze_post_build(master_data, "master") | |
| analyze_all_jobs(master_data, "master") | |
| # Regression analysis | |
| if not args.no_regression and bors_build_key and bors_data: | |
| regression_analysis(bors_data, bors_build_key) | |
| print() | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ================================================================================ | |
| MATHLIB CI TIMING REPORT | |
| ================================================================================ | |
| Job dependency graph: | |
| style_lint ──────────────────────────────────────────────┐ | |
| build ──────────┬────────────────────────────────────────┤ | |
| ├── upload_cache ──┬── post_steps ───────┤ | |
| │ │ │ | |
| └──────────────────┘ final (post-ci) | |
| style_lint and build start in parallel. Everything else chains off build. | |
| ================================================================================ | |
| BORS (STAGING) RUNS — pre-merge CI (20 runs) | |
| ================================================================================ | |
| Build job (bors) | |
| Job: ci (staging) / Build, 20 runs | |
| Stage Mean Min Max StdDev % | |
| -------------------------------------------------------------------------- | |
| Setup & checkout 0m48s 0m43s 0m55s 0m02s 2.8% | |
| Get cache 0m16s 0m14s 0m21s 0m02s 1.0% | |
| Update .lean files 0m09s 0m09s 0m12s 0m00s 0.5% | |
| Build Mathlib 18m59s 0m09s 37m20s 13m15s 65.9% | |
| Fetch archive/CE cache 0m04s 0m03s 0m08s 0m01s 0.3% | |
| Build Archive 0m15s 0m02s 0m29s 0m09s 0.9% | |
| Build Counterexamples 0m13s 0m02s 0m20s 0m06s 0.8% | |
| Cache staging 2m05s 0m09s 4m54s 1m33s 7.2% | |
| Upload cache 0m04s 0m00s 0m09s 0m02s 0.3% | |
| Post-build checks 0m03s 0m02s 0m04s 0m00s 0.2% | |
| Run tests 1m36s 1m21s 2m04s 0m15s 5.6% | |
| Run linter 4m09s 3m55s 4m45s 0m10s 14.4% | |
| Misc cleanup 0m00s 0m00s 0m01s 0m00s 0.0% | |
| TOTAL 28m49s 7m29s 50m10s 15m05s | |
| Post-Build Step (bors) | |
| Job: ci (staging) / Post-Build Step, 20 runs | |
| Step Mean Min Max | |
| ---------------------------------------------------------------------------------- | |
| Configure Lean 0m10s 0m10s 0m12s | |
| get cache for Mathlib 0m51s 0m47s 1m02s | |
| get cache for Archive and Counterexamples 0m09s 0m07s 0m11s | |
| verify that everything was available in the cache 0m13s 0m13s 0m16s | |
| check declarations in db files 0m27s 0m23s 0m30s | |
| generate our import graph 0m27s 0m24s 0m30s | |
| upload the import graph 0m00s 0m00s 0m02s | |
| build everything 0m04s 0m04s 0m05s | |
| Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m00s 0m00s 0m01s | |
| Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m09s 0m07s 0m20s | |
| Set up job 0m01s 0m01s 0m03s | |
| TOTAL 2m40s 2m24s 3m00s | |
| All jobs (bors) | |
| Job Mean Min Max StdDev | |
| --------------------------------------------------------------------------------- | |
| ci (staging) / Build 28m49s 7m29s 50m10s 15m05s | |
| ci (staging) / Post-Build Step 2m40s 2m24s 3m00s 0m09s | |
| ci (staging) / Lint style 2m03s 1m45s 2m28s 0m08s | |
| ci (staging) / Upload to cache 0m50s 0m39s 1m06s 0m07s | |
| ci (staging) / Post-CI job 0m08s 0m05s 0m11s 0m01s | |
| ================================================================================ | |
| MASTER PUSH RUNS — full cache hit (6 runs) | |
| ================================================================================ | |
| Build job (master) | |
| Job: ci / Build, 6 runs | |
| Stage Mean Min Max StdDev % | |
| -------------------------------------------------------------------------- | |
| Setup & checkout 0m47s 0m41s 0m50s 0m03s 10.9% | |
| Get cache 0m14s 0m14s 0m16s 0m00s 3.4% | |
| Update .lean files 0m09s 0m09s 0m10s 0m00s 2.1% | |
| Build Mathlib 0m05s 0m04s 0m07s 0m01s 1.3% | |
| Fetch archive/CE cache 0m03s 0m03s 0m04s 0m00s 0.8% | |
| Build Archive 0m02s 0m02s 0m03s 0m00s 0.5% | |
| Build Counterexamples 0m02s 0m02s 0m03s 0m00s 0.5% | |
| Cache staging 0m09s 0m07s 0m12s 0m02s 2.2% | |
| Post-build checks 0m03s 0m03s 0m04s 0m00s 0.7% | |
| Run tests 1m27s 1m21s 1m47s 0m09s 20.3% | |
| Run linter 4m03s 3m53s 4m08s 0m05s 56.5% | |
| TOTAL 7m11s 6m49s 7m42s 0m17s | |
| Post-Build Step (master) | |
| Job: ci / Post-Build Step, 6 runs | |
| Step Mean Min Max | |
| ---------------------------------------------------------------------------------- | |
| Configure Lean 0m10s 0m09s 0m12s | |
| get cache for Mathlib 0m48s 0m46s 0m51s | |
| get cache for Archive and Counterexamples 0m08s 0m07s 0m10s | |
| verify that everything was available in the cache 0m13s 0m13s 0m14s | |
| check declarations in db files 0m26s 0m24s 0m28s | |
| generate our import graph 0m26s 0m23s 0m28s | |
| upload the import graph 0m01s 0m00s 0m02s | |
| build everything 0m04s 0m04s 0m05s | |
| Post Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m00s 0m00s 0m01s | |
| Run actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd 0m08s 0m08s 0m09s | |
| Set up job 0m02s 0m01s 0m03s | |
| TOTAL 2m33s 2m22s 2m42s | |
| All jobs (master) | |
| Job Mean Min Max StdDev | |
| --------------------------------------------------------------------------------- | |
| ci / Build 7m11s 6m49s 7m42s 0m17s | |
| ci / Post-Build Step 2m33s 2m22s 2m42s 0m07s | |
| ci / Lint style 2m01s 1m55s 2m10s 0m06s | |
| ci / Post-CI job 0m09s 0m07s 0m11s 0m01s | |
| ci / Upload to cache 0m00s 0m00s 0m00s 0m00s | |
| ================================================================================ | |
| BUILD TIME vs CACHE MISSES | |
| ================================================================================ | |
| Downloading build logs for 20 runs... | |
| 20 runs with cache data (sorted by files built): | |
| Run ID Built Replayed Total Miss% Build time sec/file | |
| ------------------------------------------------------------------------ | |
| 22775816611 101 14504 14605 0.7% 0m09s 0.09s | |
| 22783869343 103 14502 14605 0.7% 0m15s 0.15s | |
| 22775422433 108 14496 14604 0.7% 0m22s 0.20s | |
| 22784728370 201 14403 14604 1.4% 2m11s 0.65s | |
| 22785415984 299 14306 14605 2.0% 3m33s 0.71s | |
| 22777674484 572 13821 14393 4.0% 4m56s 0.52s | |
| 22778844388 1695 12135 13830 12.3% 13m47s 0.49s | |
| 22782831772 2269 11147 13416 16.9% 16m55s 0.45s | |
| 22773803465 2377 11369 13746 17.3% 16m06s 0.41s | |
| 22756917363 2532 10469 13001 19.5% 17m19s 0.41s | |
| 22781597975 3337 9049 12386 26.9% 22m36s 0.41s | |
| 22762048381 3610 8559 12169 29.7% 24m37s 0.41s | |
| 22785974094 3905 8002 11907 32.8% 26m23s 0.41s | |
| 22768512555 4503 7428 11931 37.7% 28m35s 0.38s | |
| 22770264518 4599 7222 11821 38.9% 28m46s 0.38s | |
| 22764829579 5715 4921 10636 53.7% 34m14s 0.36s | |
| 22763438045 5766 4819 10585 54.5% 33m11s 0.35s | |
| 22771976575 5849 4708 10557 55.4% 32m20s 0.33s | |
| 22779837246 6345 3735 10080 62.9% 36m08s 0.34s | |
| 22758002567 6868 2489 9357 73.4% 37m20s 0.33s | |
| Linear regression: build_time = 0.3361 * files_built + 118.2 | |
| R = 0.9900, R-squared = 0.9800 | |
| 0.336s per file (336.1s per 1000 files, 5.6 min per 1000 files) | |
| Baseline (0 misses): 118s (2.0 min) | |
| Residual correlation with x^2: -0.256 (linear model adequate) | |
| Residuals: | |
| Built Actual Predicted Residual | |
| -------------------------------------------- | |
| 101 0m09s 2m32s -143s | |
| 103 0m15s 2m32s -138s | |
| 108 0m22s 2m34s -132s | |
| 201 2m11s 3m05s -55s | |
| 299 3m33s 3m38s -6s | |
| 572 4m56s 5m10s -14s | |
| 1695 13m47s 11m27s +139s | |
| 2269 16m55s 14m40s +134s | |
| 2377 16m06s 15m17s +49s | |
| 2532 17m19s 16m09s +70s | |
| 3337 22m36s 20m39s +116s | |
| 3610 24m37s 22m11s +145s | |
| 3905 26m23s 23m50s +152s | |
| 4503 28m35s 27m11s +83s | |
| 4599 28m46s 27m43s +62s | |
| 5715 34m14s 33m59s +15s | |
| 5766 33m11s 34m16s -65s | |
| 5849 32m20s 34m44s -144s | |
| 6345 36m08s 37m30s -83s | |
| 6868 37m20s 40m26s -187s | |
| Scatter: files built (x) vs build time in minutes (y) | |
| 37.3m | ......| | |
| 35.1m | ...* *| | |
| 32.9m | .*.. | | |
| 30.7m | .... * | | |
| 28.5m | ** .... | | |
| 26.4m | * ... | | |
| 24.2m | * .... | | |
| 22.0m | * .... | | |
| 19.8m | ... | | |
| 17.6m | .... | | |
| 15.4m | ***... | | |
| 13.2m | * .... | | |
| 11.0m | ... | | |
| 8.8m | .... | | |
| 6.6m | .... | | |
| 4.4m | *.. | | |
| 2.2m |.*.. | | |
| 0.0m |* | | |
| +-----------------------------------------------------------------+ | |
| 101 6868 | |
| files built (cache misses) | |
| * = data point, . = regression line | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment