Skip to content

Instantly share code, notes, and snippets.

@iamgroot42
Last active May 29, 2025 00:13
Show Gist options
  • Save iamgroot42/5f1f33e39621e545c621e90472b649d3 to your computer and use it in GitHub Desktop.
Save iamgroot42/5f1f33e39621e545c621e90472b649d3 to your computer and use it in GitHub Desktop.
Utility wrapper to monitor GPU usage for experiments
#!/usr/bin/env bash
# Usage: ./blink.sh your_command_here args...
# Color definitions
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
WHITE='\033[0;37m'
BOLD='\033[1m'
RESET='\033[0m'
LOG=$(mktemp)
INTERVAL=1.0 # Default
# Parse command line options
while getopts "i:h" opt; do
case $opt in
i)
INTERVAL=$OPTARG
;;
h)
echo "Usage: $0 [-i interval] command [args...]"
echo " -i interval Sampling interval in seconds (default: 1.0)"
echo " -h Show this help"
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
esac
done
shift $((OPTIND-1))
# Validate interval
if ! [[ "$INTERVAL" =~ ^[0-9]+\.?[0-9]*$ ]] || (( $(echo "$INTERVAL <= 0" | bc -l) )); then
echo "Error: Interval must be a positive number" >&2
exit 1
fi
trap 'kill $CMD_PID $SAMP_PID 2>/dev/null; wait $CMD_PID $SAMP_PID 2>/dev/null; rm -f "$LOG"; exit' INT TERM EXIT
# Clear some space and add newline
echo -e "\n\n${CYAN}${BOLD}๐Ÿš€ Starting GPU Monitor${RESET}"
echo -e "${WHITE}Command: ${YELLOW}$@${RESET}"
echo -e "${WHITE}Sampling: Every ${YELLOW}${INTERVAL}s${RESET}"
echo -e "${CYAN}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${RESET}\n"
# Create GPU UUID to index mapping
declare -A GPU_INDEX_MAP
gpu_index=0
while IFS= read -r line; do
uuid=$(echo "$line" | awk '{print $1}')
GPU_INDEX_MAP[$uuid]=$gpu_index
((gpu_index++))
done < <(nvidia-smi --query-gpu=gpu_uuid --format=csv,noheader,nounits)
# Start command in background (NO COLOR INTERFERENCE)
"$@" &
CMD_PID=$!
# Background GPU usage logger (memory only)
{
while kill -0 $CMD_PID 2>/dev/null; do
timestamp=$(date +%s)
nvidia-smi --query-compute-apps=pid,gpu_uuid,used_gpu_memory --format=csv,noheader,nounits | grep "^ *$CMD_PID" | sed "s/^/$timestamp,/" >> "$LOG"
sleep $INTERVAL
done
} &
SAMP_PID=$!
wait $CMD_PID
RET=$?
kill $SAMP_PID 2>/dev/null
wait $SAMP_PID 2>/dev/null
# Parse and summarize
declare -A GPU_MEM
declare -A GPU_TIMELINE
while IFS=, read -r timestamp pid gpu mem; do
key=$(echo $gpu | xargs)
val=$(echo $mem | xargs)
GPU_MEM[$key]+="$val "
GPU_TIMELINE[$key]+="$timestamp:$val "
done < "$LOG"
# Function to draw time-series graph
draw_timeline() {
local timeline=($1)
local max=$2
local height=8
local width=60
# Extract values
local values=()
for entry in "${timeline[@]}"; do
val="${entry#*:}"
values+=($val)
done
# Sample if too many data points
local step=1
if [ ${#values[@]} -gt $width ]; then
step=$((${#values[@]} / width))
fi
# Draw graph
echo -e "\n ${CYAN}Memory Usage Over Time:${RESET}"
for ((h=$height; h>0; h--)); do
echo -ne " "
printf "%4d MiB " $((max * h / height))
for ((i=0; i<${#values[@]}; i+=$step)); do
val=${values[$i]}
if [ $((val * height / max)) -ge $h ]; then
echo -ne "${GREEN}โ–ˆ${RESET}"
else
echo -ne " "
fi
done
echo
done
echo -ne " "
printf "%8s " ""
for ((i=0; i<$((width<${#values[@]}?width:${#values[@]})); i++)); do echo -ne "โ”€"; done
echo
echo -e " ${CYAN}Time โ†’${RESET} (${#values[@]} samples @ ${INTERVAL}s intervals)"
}
echo -e "\n${PURPLE}${BOLD}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${RESET}"
echo -e "${WHITE}${BOLD}๐Ÿ“Š GPU Usage Summary${RESET}"
echo -e "${PURPLE}${BOLD}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${RESET}\n"
if [ ${#GPU_MEM[@]} -eq 0 ]; then
echo -e "${YELLOW}โš ๏ธ No GPU usage detected${RESET}"
else
for gpu in "${!GPU_MEM[@]}"; do
mems=(${GPU_MEM[$gpu]})
timeline=(${GPU_TIMELINE[$gpu]})
max=0; sum=0; min=999999
for m in "${mems[@]}"; do
(( m > max )) && max=$m
(( m < min )) && min=$m
sum=$((sum + m))
done
avg=$((sum / ${#mems[@]}))
# Get GPU index from UUID
gpu_idx=${GPU_INDEX_MAP[$gpu]}
# GPU name with icon
echo -e "${BLUE}${BOLD}๐ŸŽฎ GPU:${gpu_idx}${RESET}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${RESET}"
# Stats
echo -e " ${GREEN}โ–ฒ${RESET} Peak Memory: ${BOLD}${RED}$max MiB${RESET}"
echo -e " ${YELLOW}โ—†${RESET} Avg Memory: ${BOLD}${YELLOW}$avg MiB${RESET}"
echo -e " ${CYAN}โ–ผ${RESET} Min Memory: ${BOLD}${CYAN}$min MiB${RESET}"
echo -e " ${PURPLE}๐Ÿ“Š${RESET} Duration: ${BOLD}$(echo "scale=1; ${#mems[@]} * $INTERVAL" | bc)s${RESET}"
# Time series graph
draw_timeline "${timeline[*]}" $max
echo
done
fi
echo -e "${PURPLE}${BOLD}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${RESET}"
# Exit status
if [ $RET -eq 0 ]; then
echo -e "${GREEN}${BOLD}โœ… Command completed successfully${RESET}"
else
echo -e "${RED}${BOLD}โŒ Command failed with exit code: $RET${RESET}"
fi
exit $RET
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment