|
# SPDX-License-Identifier: MIT |
|
# Debug PySpark executors in PyCharm without hard-coding a port. |
|
# Works by discovering PyCharm's debug server port from parent process cmdlines. |
|
|
|
from __future__ import annotations |
|
|
|
import os |
|
import sys |
|
from functools import lru_cache |
|
|
|
try: |
|
import psutil # type: ignore |
|
except Exception as _e: # pragma: no cover |
|
psutil = None # graceful degradation |
|
|
|
|
|
def _log(msg: str) -> None: |
|
print(f"[executor-debug] {msg}") |
|
|
|
|
|
def debug_here(host: str | None = None, port: int | None = None, suspend: bool = False) -> None: |
|
"""Attach PyCharm debugger from inside an executor. |
|
|
|
Args: |
|
host: Debug server host. Defaults to env `PYCHARM_DEBUG_HOST` or '127.0.0.1'. |
|
port: Debug server port. Defaults to env `PYCHARM_DEBUG_PORT` or discovered from parent `--port`. |
|
suspend: If True, break immediately on attach. |
|
""" |
|
try: |
|
import pydevd_pycharm # type: ignore |
|
except Exception as e: # pragma: no cover |
|
_log(f"pydevd_pycharm not available: {e}") |
|
return |
|
|
|
host, port = _resolve_host_port(host, port) |
|
if port is None: |
|
_log("No debug port found. Start PyCharm in Debug (it adds --port), or set PYCHARM_DEBUG_PORT.") |
|
return |
|
|
|
try: |
|
pydevd_pycharm.settrace( |
|
host=host, |
|
port=port, |
|
stdoutToServer=True, |
|
stderrToServer=True, |
|
suspend=suspend, |
|
) |
|
_log(f"Attached to PyCharm debug server at {host}:{port} (suspend={suspend}).") |
|
except Exception as e: |
|
_log(f"Failed to attach: {e}") |
|
|
|
|
|
def _resolve_host_port(host: str | None, port: int | None) -> tuple[str, int | None]: |
|
# Priority: explicit args → env → discovery → defaults |
|
h = host or os.getenv("PYCHARM_DEBUG_HOST") or "127.0.0.1" |
|
|
|
p_env = os.getenv("PYCHARM_DEBUG_PORT") |
|
if port is not None: |
|
return h, int(port) |
|
if p_env: |
|
try: |
|
return h, int(p_env) |
|
except ValueError: |
|
_log(f"Invalid PYCHARM_DEBUG_PORT: {p_env}") |
|
|
|
return h, _discover_pycharm_port() |
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def _discover_pycharm_port(max_depth: int = 15) -> int | None: |
|
"""Walk up parent processes to find a command line containing `--port <N>`. |
|
Returns the integer port if found, else None. |
|
""" |
|
if psutil is None: |
|
_log("psutil is not installed; cannot auto-discover port. Set PYCHARM_DEBUG_PORT.") |
|
return None |
|
|
|
try: |
|
proc = psutil.Process() |
|
depth = 0 |
|
while proc and depth <= max_depth: |
|
parent = proc.parent() |
|
if not parent: |
|
return None |
|
|
|
try: |
|
cmdline = parent.cmdline() or [] |
|
except Exception: |
|
cmdline = [] |
|
|
|
# Typical: .../pydevd.py --port 31294 --client 127.0.0.1 ... |
|
if "--port" in cmdline: |
|
try: |
|
idx = cmdline.index("--port") |
|
if idx + 1 < len(cmdline): |
|
port_str = cmdline[idx + 1] |
|
return int(port_str) |
|
except Exception: |
|
pass |
|
|
|
proc = parent |
|
depth += 1 |
|
except Exception as e: |
|
_log(f"Error discovering port: {e}") |
|
|
|
return None |