Skip to content

Instantly share code, notes, and snippets.

@rgant
Last active May 10, 2026 20:46
Show Gist options
  • Select an option

  • Save rgant/a26bc387ed3fdd130b6111a297fa2f93 to your computer and use it in GitHub Desktop.

Select an option

Save rgant/a26bc387ed3fdd130b6111a297fa2f93 to your computer and use it in GitHub Desktop.
Measure where pylint/astroid spend time on a target codebase
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.14"
# # Mirror the project's [project] dependencies so astroid can resolve imports
# # while profiling — otherwise pylint never recurses into sqlalchemy/pydantic/
# # fastapi and the workload collapses to ~15% of the real cost.
# dependencies = [
# "pylint>=4.0.5,<5",
# "alembic>=1.18.4,<2",
# "fastapi>=0.136.1,<0.137",
# "httpxyz>=0.31.2,<0.32",
# "jinja2>=3.1.6,<4",
# "pillow>=12.2.0,<13",
# "pydantic>=2.13.4,<3",
# "pydantic-settings>=2.14.1,<3",
# "python-multipart>=0.0.27,<0.0.28",
# "sqlalchemy>=2.0.49,<3",
# "starlette>=0.52.1,<0.53",
# "uvicorn[standard]>=0.46.0,<0.47",
# ]
# ///
"""profile_pylint.py — measure where pylint/astroid spend time on a target codebase.
Runs pylint under cProfile, forces ``--jobs=1`` (cProfile can't follow child processes), and prints
two views:
1. Standard pstats top-N by cumulative time.
2. A pinned-hotspot table for functions called out in upstream pylint/astroid perf threads —
``ClassDef.ancestors`` (astroid#1115), ``ast_from_module_name`` (astroid#1145), and a few
others. Each row shows total + primitive call counts (matching pstats' ``nc/cc`` convention)
and a percent-of-wall column so the headline metric is visible without extra arithmetic.
Wall time reported here is a conservative upper bound vs ``--jobs=0`` — cProfile only sees the
parent process. The *relative* breakdown across functions is what's representative; absolute wall
time on real configs will be lower.
Usage::
uv run scripts/profile_pylint.py path/to/file_or_dir
uv run scripts/profile_pylint.py . --top 25
uv run scripts/profile_pylint.py . --save profile.prof # also dump raw .prof
uv run scripts/profile_pylint.py . -- --rcfile=pyproject.toml --disable=duplicate-code
^^ pass args to pylint after `--`
Output is plain text on stdout; copy the pinned-hotspot table verbatim into upstream issue comments
and the maintainers can compare directly across reporters' projects.
"""
import argparse
import cProfile
import importlib.metadata
import platform
import pstats
import sys
import time
# Functions called out in upstream perf threads. Each pinned row is reported with
# (total_calls, primitive_calls, cumtime, percent of wall) so a comment on those threads can quote
# a directly comparable data point without re-grep'ing pstats output.
_PINNED: list[tuple[str, str, str]] = [
# (func_name, file_basename_suffix, upstream_thread_or_label)
("ancestors", "scoped_nodes.py", "astroid#1115"),
("infer", "node_ng.py", "general inference"),
("ast_from_module_name", "manager.py", "astroid#1145"),
("_visit", "transforms.py", "brain transform visit"),
("file_build", "builder.py", "AST build cost"),
]
_BAR = "=" * 78
_RULE = "-" * 30
def main() -> int:
args = _parse_args()
profiler, wall, exit_code = _run_pylint(args.target, args.pylint_args)
stats = pstats.Stats(profiler).sort_stats("cumulative")
print()
print(_BAR)
print(_header(args.target, wall, exit_code))
print(_BAR)
print()
print(f"--- top {args.top} by cumulative time {_RULE}")
stats.print_stats(args.top)
print(f"--- pinned hotspots (upstream perf threads) {_RULE}")
print(f"{'function (file)':<42} {'thread':<22} {'total':>10} {'prim':>10} {'cumtime':>10} {'%wall':>7}")
one_liner_parts: list[str] = []
for func, suffix, label in _PINNED:
total_calls, prim_calls, cumtime = _accumulate(stats, func, suffix)
pct = (cumtime / wall * 100) if wall > 0 else 0.0
print(
f"{f'{func} ({suffix})':<42} {label:<22}"
f" {total_calls:>10} {prim_calls:>10} {cumtime:>9.3f}s {pct:>6.1f}%",
)
one_liner_parts.append(f"{func}={cumtime:.2f}s/{_human(total_calls)}")
print()
print("Percent column is share of wall time for that function's call hierarchy;")
print("entries overlap because callers' cumtime includes callees', so they sum to >100%.")
print()
print(f"--- comment-ready one-liner {_RULE}")
print(" " + " ".join(one_liner_parts))
if args.save:
profiler.dump_stats(args.save)
print()
print(f"raw profile written to {args.save}")
print(f" follow-up: python -m pstats {args.save}")
return exit_code
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
_ = parser.add_argument("target", help="path to lint (file or directory)")
_ = parser.add_argument(
"--top", type=int, default=15, metavar="N",
help="rows of standard pstats output to print (default: 15)",
)
_ = parser.add_argument(
"--save", metavar="PATH",
help="also dump a raw .prof file for `python -m pstats PATH` follow-up",
)
_ = parser.add_argument(
"pylint_args", nargs="*",
help="extra arguments after `--` are forwarded to pylint (e.g. `-- --rcfile=...`)",
)
return parser.parse_args()
def _run_pylint(target: str, extra_args: list[str]) -> tuple[cProfile.Profile, float, int]:
"""Run pylint under cProfile and return (profiler, wall_seconds, pylint_exit_code).
Forces ``--jobs=1`` because cProfile only instruments the parent process; profiling a
multi-process pylint run captures only the orchestrator's idle time, which is useless.
"""
import pylint.lint # noqa: PLC0415 # deferred so `--help` doesn't pay the import cost
argv = ["--jobs=1", *extra_args, target]
profiler = cProfile.Profile()
start = time.perf_counter()
profiler.enable()
exit_code = 0
try:
_ = pylint.lint.Run(argv, exit=False)
except SystemExit as exc:
# Some pylint versions call sys.exit even with exit=False; capture the code instead.
exit_code = int(exc.code or 0)
finally:
profiler.disable()
return profiler, time.perf_counter() - start, exit_code
def _header(target: str, wall: float, exit_code: int) -> str:
pkg_versions = " ".join(f"{pkg}={_safe_version(pkg)}" for pkg in ("pylint", "astroid"))
py = f"py{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
plat = f"{sys.platform}/{platform.machine()}, {_gil_status()}"
return (
f"profile_pylint {pkg_versions} {py} ({plat})"
f" target={target!r} wall={wall:.2f}s exit={exit_code}"
)
def _safe_version(pkg: str) -> str:
try:
return importlib.metadata.version(pkg)
except importlib.metadata.PackageNotFoundError:
return "?"
def _gil_status() -> str:
"""Return ``GIL`` or ``no-GIL`` to disambiguate free-threaded builds.
``sys._is_gil_enabled`` was added in Python 3.13; on older interpreters the GIL is always
enabled, so the helper's absence is itself the answer.
"""
is_enabled = getattr(sys, "_is_gil_enabled", None)
if is_enabled is None:
return "GIL"
return "GIL" if is_enabled() else "no-GIL"
def _human(n: int) -> str:
"""Format a call count compactly: 332387 → '332k', 1234567 → '1.2M', 14 → '14'."""
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M"
if n >= 1_000:
return f"{n / 1_000:.1f}k"
return str(n)
def _accumulate(stats: pstats.Stats, func: str, file_suffix: str) -> tuple[int, int, float]:
"""Sum (total_calls, primitive_calls, cumtime) for entries matching name + file suffix.
pstats values are ``(cc, nc, tt, ct, callers)`` where ``cc`` is the primitive count and ``nc``
is the total count (recursion-inflated). pstats' default printer displays them as ``nc/cc``
when they differ; we surface both so issue comments preserve the recursion signal.
"""
total = 0
prim = 0
cumtime = 0.0
# pstats.Stats.stats is the de-facto inspection API for cProfile output.
for (filename, _lineno, name), (cc, nc, _tt, ct, _callers) in stats.stats.items(): # pyright: ignore[reportAttributeAccessIssue]
if name == func and filename.endswith(file_suffix):
prim += cc
total += nc
cumtime += ct
return total, prim, cumtime
if __name__ == "__main__":
sys.exit(main())
@rgant

rgant commented May 7, 2026

Copy link
Copy Markdown
Author
Patroclus:~/Programming/cat-watcher (main)$ pixi run -- python /Users/rgant/Programming/cat-watcher/scripts/profile_pylint.py src/cat_watcher/web/routes.py --top 8
Using venv: /Users/rgant/Programming/cat-watcher/.pixi/envs/default

--------------------------------------------------------------------
Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)


==============================================================================
profile_pylint  pylint=4.0.5 astroid=4.0.4  py3.14.4 (darwin/arm64, GIL)  target='src/cat_watcher/web/routes.py'  wall=5.59s  exit=0
==============================================================================

--- top 8 by cumulative time ------------------------------
         17081821 function calls (13913319 primitive calls) in 5.585 seconds

   Ordered by: cumulative time
   List reduced from 9103 to 8 due to restriction <8>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    5.589    5.589 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/pylint/lint/run.py:143(__init__)
        1    0.000    0.000    5.446    5.446 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/pylint/lint/pylinter.py:672(check)
198920/6889    0.046    0.000    3.778    0.001 {built-in method builtins.next}
327110/6541    0.187    0.000    3.766    0.001 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/astroid/nodes/node_ng.py:121(infer)
80440/1610    0.048    0.000    3.759    0.002 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/astroid/decorators.py:32(wrapped)
88725/2113    0.024    0.000    3.758    0.002 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/astroid/decorators.py:78(inner)
   125/24    0.000    0.000    3.679    0.153 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/astroid/manager.py:131(ast_from_file)
73667/5122    0.070    0.000    3.626    0.001 /Users/rgant/Programming/cat-watcher/.pixi/envs/default/lib/python3.14/site-packages/astroid/bases.py:153(_infer_stmts)


--- pinned hotspots (upstream perf threads) ------------------------------
function (file)                            thread                      total       prim    cumtime   %wall
ancestors (scoped_nodes.py)                astroid#1115               332387      57689     2.736s   49.0%
infer (node_ng.py)                         general inference          327110       6541     3.766s   67.4%
ast_from_module_name (manager.py)          astroid#1145                 6844       1022     3.558s   63.7%
_visit (transforms.py)                     brain transform visit      296630        220     2.765s   49.5%
file_build (builder.py)                    AST build cost                113         14     3.567s   63.8%

Percent column is share of wall time for that function's call hierarchy;
entries overlap because callers' cumtime includes callees', so they sum to >100%.

--- comment-ready one-liner ------------------------------
  ancestors=2.74s/332.4k  infer=3.77s/327.1k  ast_from_module_name=3.56s/6.8k  _visit=2.77s/296.6k  file_build=3.57s/113

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment