Last active
May 29, 2025 00:13
-
-
Save iamgroot42/5f1f33e39621e545c621e90472b649d3 to your computer and use it in GitHub Desktop.
Utility wrapper to monitor GPU usage for experiments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Usage: ./blink.sh your_command_here args... | |
# Color definitions | |
RED='\033[0;31m' | |
GREEN='\033[0;32m' | |
YELLOW='\033[0;33m' | |
BLUE='\033[0;34m' | |
PURPLE='\033[0;35m' | |
CYAN='\033[0;36m' | |
WHITE='\033[0;37m' | |
BOLD='\033[1m' | |
RESET='\033[0m' | |
LOG=$(mktemp) | |
INTERVAL=1.0 # Default | |
# Parse command line options | |
while getopts "i:h" opt; do | |
case $opt in | |
i) | |
INTERVAL=$OPTARG | |
;; | |
h) | |
echo "Usage: $0 [-i interval] command [args...]" | |
echo " -i interval Sampling interval in seconds (default: 1.0)" | |
echo " -h Show this help" | |
exit 0 | |
;; | |
\?) | |
echo "Invalid option: -$OPTARG" >&2 | |
exit 1 | |
;; | |
esac | |
done | |
shift $((OPTIND-1)) | |
# Validate interval | |
if ! [[ "$INTERVAL" =~ ^[0-9]+\.?[0-9]*$ ]] || (( $(echo "$INTERVAL <= 0" | bc -l) )); then | |
echo "Error: Interval must be a positive number" >&2 | |
exit 1 | |
fi | |
trap 'kill $CMD_PID $SAMP_PID 2>/dev/null; wait $CMD_PID $SAMP_PID 2>/dev/null; rm -f "$LOG"; exit' INT TERM EXIT | |
# Clear some space and add newline | |
echo -e "\n\n${CYAN}${BOLD}๐ Starting GPU Monitor${RESET}" | |
echo -e "${WHITE}Command: ${YELLOW}$@${RESET}" | |
echo -e "${WHITE}Sampling: Every ${YELLOW}${INTERVAL}s${RESET}" | |
echo -e "${CYAN}โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ${RESET}\n" | |
# Create GPU UUID to index mapping | |
declare -A GPU_INDEX_MAP | |
gpu_index=0 | |
while IFS= read -r line; do | |
uuid=$(echo "$line" | awk '{print $1}') | |
GPU_INDEX_MAP[$uuid]=$gpu_index | |
((gpu_index++)) | |
done < <(nvidia-smi --query-gpu=gpu_uuid --format=csv,noheader,nounits) | |
# Start command in background (NO COLOR INTERFERENCE) | |
"$@" & | |
CMD_PID=$! | |
# Background GPU usage logger (memory only) | |
{ | |
while kill -0 $CMD_PID 2>/dev/null; do | |
timestamp=$(date +%s) | |
nvidia-smi --query-compute-apps=pid,gpu_uuid,used_gpu_memory --format=csv,noheader,nounits | grep "^ *$CMD_PID" | sed "s/^/$timestamp,/" >> "$LOG" | |
sleep $INTERVAL | |
done | |
} & | |
SAMP_PID=$! | |
wait $CMD_PID | |
RET=$? | |
kill $SAMP_PID 2>/dev/null | |
wait $SAMP_PID 2>/dev/null | |
# Parse and summarize | |
declare -A GPU_MEM | |
declare -A GPU_TIMELINE | |
while IFS=, read -r timestamp pid gpu mem; do | |
key=$(echo $gpu | xargs) | |
val=$(echo $mem | xargs) | |
GPU_MEM[$key]+="$val " | |
GPU_TIMELINE[$key]+="$timestamp:$val " | |
done < "$LOG" | |
# Function to draw time-series graph | |
draw_timeline() { | |
local timeline=($1) | |
local max=$2 | |
local height=8 | |
local width=60 | |
# Extract values | |
local values=() | |
for entry in "${timeline[@]}"; do | |
val="${entry#*:}" | |
values+=($val) | |
done | |
# Sample if too many data points | |
local step=1 | |
if [ ${#values[@]} -gt $width ]; then | |
step=$((${#values[@]} / width)) | |
fi | |
# Draw graph | |
echo -e "\n ${CYAN}Memory Usage Over Time:${RESET}" | |
for ((h=$height; h>0; h--)); do | |
echo -ne " " | |
printf "%4d MiB " $((max * h / height)) | |
for ((i=0; i<${#values[@]}; i+=$step)); do | |
val=${values[$i]} | |
if [ $((val * height / max)) -ge $h ]; then | |
echo -ne "${GREEN}โ${RESET}" | |
else | |
echo -ne " " | |
fi | |
done | |
echo | |
done | |
echo -ne " " | |
printf "%8s " "" | |
for ((i=0; i<$((width<${#values[@]}?width:${#values[@]})); i++)); do echo -ne "โ"; done | |
echo | |
echo -e " ${CYAN}Time โ${RESET} (${#values[@]} samples @ ${INTERVAL}s intervals)" | |
} | |
echo -e "\n${PURPLE}${BOLD}โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ${RESET}" | |
echo -e "${WHITE}${BOLD}๐ GPU Usage Summary${RESET}" | |
echo -e "${PURPLE}${BOLD}โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ${RESET}\n" | |
if [ ${#GPU_MEM[@]} -eq 0 ]; then | |
echo -e "${YELLOW}โ ๏ธ No GPU usage detected${RESET}" | |
else | |
for gpu in "${!GPU_MEM[@]}"; do | |
mems=(${GPU_MEM[$gpu]}) | |
timeline=(${GPU_TIMELINE[$gpu]}) | |
max=0; sum=0; min=999999 | |
for m in "${mems[@]}"; do | |
(( m > max )) && max=$m | |
(( m < min )) && min=$m | |
sum=$((sum + m)) | |
done | |
avg=$((sum / ${#mems[@]})) | |
# Get GPU index from UUID | |
gpu_idx=${GPU_INDEX_MAP[$gpu]} | |
# GPU name with icon | |
echo -e "${BLUE}${BOLD}๐ฎ GPU:${gpu_idx}${RESET}" | |
echo -e "${BLUE}โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ${RESET}" | |
# Stats | |
echo -e " ${GREEN}โฒ${RESET} Peak Memory: ${BOLD}${RED}$max MiB${RESET}" | |
echo -e " ${YELLOW}โ${RESET} Avg Memory: ${BOLD}${YELLOW}$avg MiB${RESET}" | |
echo -e " ${CYAN}โผ${RESET} Min Memory: ${BOLD}${CYAN}$min MiB${RESET}" | |
echo -e " ${PURPLE}๐${RESET} Duration: ${BOLD}$(echo "scale=1; ${#mems[@]} * $INTERVAL" | bc)s${RESET}" | |
# Time series graph | |
draw_timeline "${timeline[*]}" $max | |
echo | |
done | |
fi | |
echo -e "${PURPLE}${BOLD}โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ${RESET}" | |
# Exit status | |
if [ $RET -eq 0 ]; then | |
echo -e "${GREEN}${BOLD}โ Command completed successfully${RESET}" | |
else | |
echo -e "${RED}${BOLD}โ Command failed with exit code: $RET${RESET}" | |
fi | |
exit $RET |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment