Skip to content

Instantly share code, notes, and snippets.

@rjurney
Created October 2, 2025 17:51
Show Gist options
  • Save rjurney/e731a9f2895b043847449af6de84eac8 to your computer and use it in GitHub Desktop.
Save rjurney/e731a9f2895b043847449af6de84eac8 to your computer and use it in GitHub Desktop.
Pynvml / Rich GPU Monitor with min/max
#!/usr/bin/env python3
"""Standalone GPU monitoring utility using pynvml and rich.
This script monitors NVIDIA GPU utilization and memory usage in real-time.
Dependencies:
pip install pynvml rich click
Usage:
./monitor.py --interval 1.0 --duration 60
./monitor.py -i 0.5 # Monitor every 0.5 seconds indefinitely
"""
import time
from typing import Any, Dict, List, Optional
import click
import pynvml # type: ignore
from rich.console import Console
from rich.live import Live
from rich.table import Table
class GPUMonitor:
"""Monitor GPU utilization and memory usage."""
def __init__(self) -> None:
"""Initialize GPU monitoring with pynvml."""
pynvml.nvmlInit()
self.device_count = pynvml.nvmlDeviceGetCount()
self.handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(self.device_count)]
# Initialize stats tracking
self.current_stats: List[Dict[str, float]] = []
self.min_memory: List[float] = [float("inf")] * self.device_count
self.max_memory: List[float] = [0.0] * self.device_count
def get_stats(self) -> List[Dict[str, float]]:
"""Get current GPU stats for all devices.
Returns
-------
List[Dict[str, float]]
List of dictionaries containing stats for each GPU
"""
stats = []
for i, handle in enumerate(self.handles):
memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
memory_used_mb = memory_info.used / (1024**2)
memory_total_mb = memory_info.total / (1024**2)
memory_percent = (memory_info.used / memory_info.total) * 100
# Update min/max tracking
self.min_memory[i] = min(self.min_memory[i], memory_used_mb)
self.max_memory[i] = max(self.max_memory[i], memory_used_mb)
stats.append(
{
"gpu_id": i,
"gpu_utilization": utilization.gpu,
"memory_used_mb": memory_used_mb,
"memory_total_mb": memory_total_mb,
"memory_percent": memory_percent,
"min_memory_mb": self.min_memory[i],
"max_memory_mb": self.max_memory[i],
}
)
self.current_stats = stats
return stats
def shutdown(self) -> None:
"""Shutdown pynvml."""
pynvml.nvmlShutdown()
def create_table(stats: List[Dict[str, float]]) -> Table:
"""Create a Rich table from GPU stats.
Parameters
----------
stats : List[Dict[str, float]]
List of GPU statistics dictionaries
Returns
-------
Table
Rich Table object
"""
table = Table(title="GPU Monitor", show_header=True, header_style="bold magenta")
table.add_column("GPU ID", style="cyan", justify="center")
table.add_column("GPU Util %", justify="right")
table.add_column("Mem Used (MB)", justify="right")
table.add_column("Mem Total (MB)", justify="right")
table.add_column("Mem %", justify="right")
table.add_column("Min Mem (MB)", justify="right", style="green")
table.add_column("Max Mem (MB)", justify="right", style="red")
for gpu_stat in stats:
table.add_row(
str(gpu_stat["gpu_id"]),
f"{gpu_stat['gpu_utilization']:.1f}",
f"{gpu_stat['memory_used_mb']:.1f}",
f"{gpu_stat['memory_total_mb']:.1f}",
f"{gpu_stat['memory_percent']:.1f}",
f"{gpu_stat['min_memory_mb']:.1f}",
f"{gpu_stat['max_memory_mb']:.1f}",
)
return table
def monitor_gpus(interval: float = 1.0, duration: Optional[float] = None) -> None:
"""Monitor GPU utilization and memory continuously.
Parameters
----------
interval : float, optional
Time interval between updates in seconds, by default 1.0
duration : Optional[float], optional
Total duration to monitor in seconds, by default None (run indefinitely)
"""
monitor = GPUMonitor()
console = Console()
start_time = time.time()
def generate_table():
"""Generator that yields tables at the specified interval."""
while True:
# Check if duration exceeded
if duration is not None and (time.time() - start_time) >= duration:
break
stats = monitor.get_stats()
yield create_table(stats)
time.sleep(interval)
try:
with Live(console=console, refresh_per_second=4) as live:
for table in generate_table():
live.update(table)
except KeyboardInterrupt:
console.print("\n[yellow]Monitoring stopped by user[/yellow]")
finally:
monitor.shutdown()
@click.command(context_settings={"show_default": True})
@click.option(
"-i",
"--interval",
type=float,
default=1.0,
help="Time interval between updates in seconds",
)
@click.option(
"-d",
"--duration",
type=float,
default=None,
help="Total duration to monitor in seconds (None = indefinite)",
)
def main(interval: float, duration: Optional[float]) -> None:
"""Monitor NVIDIA GPU utilization and memory usage in real-time.
Displays a live-updating table showing GPU utilization percentage, memory usage,
and tracks minimum/maximum memory usage across the monitoring session.
Press Ctrl+C to stop monitoring.
"""
monitor_gpus(interval=interval, duration=duration)
if __name__ == "__main__":
main()
┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ GPU ID ┃ GPU Util % ┃ Mem Used (MB) ┃ Mem Total (MB) ┃ Mem % ┃ Min Mem (MB) ┃ Max Mem (MB) ┃
┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ 0 │ 100.0 │ 9856.0 │ 12288.0 │ 80.2 │ 9816.0 │ 9856.0 │
│ 1 │ 75.0 │ 4534.4 │ 12288.0 │ 36.9 │ 4534.4 │ 4534.4 │
└────────┴────────────┴───────────────┴────────────────┴───────┴──────────────┴──────────────┘
pynvml>=12.0.0,<13.0.0
rich>=13.0.0
click>=8.1.7
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment