Last active
May 10, 2026 20:46
-
-
Save rgant/a26bc387ed3fdd130b6111a297fa2f93 to your computer and use it in GitHub Desktop.
Measure where pylint/astroid spend time on a target codebase
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.14" | |
| # # Mirror the project's [project] dependencies so astroid can resolve imports | |
| # # while profiling — otherwise pylint never recurses into sqlalchemy/pydantic/ | |
| # # fastapi and the workload collapses to ~15% of the real cost. | |
| # dependencies = [ | |
| # "pylint>=4.0.5,<5", | |
| # "alembic>=1.18.4,<2", | |
| # "fastapi>=0.136.1,<0.137", | |
| # "httpxyz>=0.31.2,<0.32", | |
| # "jinja2>=3.1.6,<4", | |
| # "pillow>=12.2.0,<13", | |
| # "pydantic>=2.13.4,<3", | |
| # "pydantic-settings>=2.14.1,<3", | |
| # "python-multipart>=0.0.27,<0.0.28", | |
| # "sqlalchemy>=2.0.49,<3", | |
| # "starlette>=0.52.1,<0.53", | |
| # "uvicorn[standard]>=0.46.0,<0.47", | |
| # ] | |
| # /// | |
| """profile_pylint.py — measure where pylint/astroid spend time on a target codebase. | |
| Runs pylint under cProfile, forces ``--jobs=1`` (cProfile can't follow child processes), and prints | |
| two views: | |
| 1. Standard pstats top-N by cumulative time. | |
| 2. A pinned-hotspot table for functions called out in upstream pylint/astroid perf threads — | |
| ``ClassDef.ancestors`` (astroid#1115), ``ast_from_module_name`` (astroid#1145), and a few | |
| others. Each row shows total + primitive call counts (matching pstats' ``nc/cc`` convention) | |
| and a percent-of-wall column so the headline metric is visible without extra arithmetic. | |
| Wall time reported here is a conservative upper bound vs ``--jobs=0`` — cProfile only sees the | |
| parent process. The *relative* breakdown across functions is what's representative; absolute wall | |
| time on real configs will be lower. | |
| Usage:: | |
| uv run scripts/profile_pylint.py path/to/file_or_dir | |
| uv run scripts/profile_pylint.py . --top 25 | |
| uv run scripts/profile_pylint.py . --save profile.prof # also dump raw .prof | |
| uv run scripts/profile_pylint.py . -- --rcfile=pyproject.toml --disable=duplicate-code | |
| ^^ pass args to pylint after `--` | |
| Output is plain text on stdout; copy the pinned-hotspot table verbatim into upstream issue comments | |
| and the maintainers can compare directly across reporters' projects. | |
| """ | |
| import argparse | |
| import cProfile | |
| import importlib.metadata | |
| import platform | |
| import pstats | |
| import sys | |
| import time | |
| # Functions called out in upstream perf threads. Each pinned row is reported with | |
| # (total_calls, primitive_calls, cumtime, percent of wall) so a comment on those threads can quote | |
| # a directly comparable data point without re-grep'ing pstats output. | |
| _PINNED: list[tuple[str, str, str]] = [ | |
| # (func_name, file_basename_suffix, upstream_thread_or_label) | |
| ("ancestors", "scoped_nodes.py", "astroid#1115"), | |
| ("infer", "node_ng.py", "general inference"), | |
| ("ast_from_module_name", "manager.py", "astroid#1145"), | |
| ("_visit", "transforms.py", "brain transform visit"), | |
| ("file_build", "builder.py", "AST build cost"), | |
| ] | |
| _BAR = "=" * 78 | |
| _RULE = "-" * 30 | |
| def main() -> int: | |
| args = _parse_args() | |
| profiler, wall, exit_code = _run_pylint(args.target, args.pylint_args) | |
| stats = pstats.Stats(profiler).sort_stats("cumulative") | |
| print() | |
| print(_BAR) | |
| print(_header(args.target, wall, exit_code)) | |
| print(_BAR) | |
| print() | |
| print(f"--- top {args.top} by cumulative time {_RULE}") | |
| stats.print_stats(args.top) | |
| print(f"--- pinned hotspots (upstream perf threads) {_RULE}") | |
| print(f"{'function (file)':<42} {'thread':<22} {'total':>10} {'prim':>10} {'cumtime':>10} {'%wall':>7}") | |
| one_liner_parts: list[str] = [] | |
| for func, suffix, label in _PINNED: | |
| total_calls, prim_calls, cumtime = _accumulate(stats, func, suffix) | |
| pct = (cumtime / wall * 100) if wall > 0 else 0.0 | |
| print( | |
| f"{f'{func} ({suffix})':<42} {label:<22}" | |
| f" {total_calls:>10} {prim_calls:>10} {cumtime:>9.3f}s {pct:>6.1f}%", | |
| ) | |
| one_liner_parts.append(f"{func}={cumtime:.2f}s/{_human(total_calls)}") | |
| print() | |
| print("Percent column is share of wall time for that function's call hierarchy;") | |
| print("entries overlap because callers' cumtime includes callees', so they sum to >100%.") | |
| print() | |
| print(f"--- comment-ready one-liner {_RULE}") | |
| print(" " + " ".join(one_liner_parts)) | |
| if args.save: | |
| profiler.dump_stats(args.save) | |
| print() | |
| print(f"raw profile written to {args.save}") | |
| print(f" follow-up: python -m pstats {args.save}") | |
| return exit_code | |
| def _parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description=__doc__, | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| ) | |
| _ = parser.add_argument("target", help="path to lint (file or directory)") | |
| _ = parser.add_argument( | |
| "--top", type=int, default=15, metavar="N", | |
| help="rows of standard pstats output to print (default: 15)", | |
| ) | |
| _ = parser.add_argument( | |
| "--save", metavar="PATH", | |
| help="also dump a raw .prof file for `python -m pstats PATH` follow-up", | |
| ) | |
| _ = parser.add_argument( | |
| "pylint_args", nargs="*", | |
| help="extra arguments after `--` are forwarded to pylint (e.g. `-- --rcfile=...`)", | |
| ) | |
| return parser.parse_args() | |
| def _run_pylint(target: str, extra_args: list[str]) -> tuple[cProfile.Profile, float, int]: | |
| """Run pylint under cProfile and return (profiler, wall_seconds, pylint_exit_code). | |
| Forces ``--jobs=1`` because cProfile only instruments the parent process; profiling a | |
| multi-process pylint run captures only the orchestrator's idle time, which is useless. | |
| """ | |
| import pylint.lint # noqa: PLC0415 # deferred so `--help` doesn't pay the import cost | |
| argv = ["--jobs=1", *extra_args, target] | |
| profiler = cProfile.Profile() | |
| start = time.perf_counter() | |
| profiler.enable() | |
| exit_code = 0 | |
| try: | |
| _ = pylint.lint.Run(argv, exit=False) | |
| except SystemExit as exc: | |
| # Some pylint versions call sys.exit even with exit=False; capture the code instead. | |
| exit_code = int(exc.code or 0) | |
| finally: | |
| profiler.disable() | |
| return profiler, time.perf_counter() - start, exit_code | |
| def _header(target: str, wall: float, exit_code: int) -> str: | |
| pkg_versions = " ".join(f"{pkg}={_safe_version(pkg)}" for pkg in ("pylint", "astroid")) | |
| py = f"py{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" | |
| plat = f"{sys.platform}/{platform.machine()}, {_gil_status()}" | |
| return ( | |
| f"profile_pylint {pkg_versions} {py} ({plat})" | |
| f" target={target!r} wall={wall:.2f}s exit={exit_code}" | |
| ) | |
| def _safe_version(pkg: str) -> str: | |
| try: | |
| return importlib.metadata.version(pkg) | |
| except importlib.metadata.PackageNotFoundError: | |
| return "?" | |
| def _gil_status() -> str: | |
| """Return ``GIL`` or ``no-GIL`` to disambiguate free-threaded builds. | |
| ``sys._is_gil_enabled`` was added in Python 3.13; on older interpreters the GIL is always | |
| enabled, so the helper's absence is itself the answer. | |
| """ | |
| is_enabled = getattr(sys, "_is_gil_enabled", None) | |
| if is_enabled is None: | |
| return "GIL" | |
| return "GIL" if is_enabled() else "no-GIL" | |
| def _human(n: int) -> str: | |
| """Format a call count compactly: 332387 → '332k', 1234567 → '1.2M', 14 → '14'.""" | |
| if n >= 1_000_000: | |
| return f"{n / 1_000_000:.1f}M" | |
| if n >= 1_000: | |
| return f"{n / 1_000:.1f}k" | |
| return str(n) | |
| def _accumulate(stats: pstats.Stats, func: str, file_suffix: str) -> tuple[int, int, float]: | |
| """Sum (total_calls, primitive_calls, cumtime) for entries matching name + file suffix. | |
| pstats values are ``(cc, nc, tt, ct, callers)`` where ``cc`` is the primitive count and ``nc`` | |
| is the total count (recursion-inflated). pstats' default printer displays them as ``nc/cc`` | |
| when they differ; we surface both so issue comments preserve the recursion signal. | |
| """ | |
| total = 0 | |
| prim = 0 | |
| cumtime = 0.0 | |
| # pstats.Stats.stats is the de-facto inspection API for cProfile output. | |
| for (filename, _lineno, name), (cc, nc, _tt, ct, _callers) in stats.stats.items(): # pyright: ignore[reportAttributeAccessIssue] | |
| if name == func and filename.endswith(file_suffix): | |
| prim += cc | |
| total += nc | |
| cumtime += ct | |
| return total, prim, cumtime | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
rgant
commented
May 7, 2026
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment