Skip to content

Instantly share code, notes, and snippets.

@airtonix
Created July 14, 2024 07:24

Revisions

  1. airtonix created this gist Jul 14, 2024.
    51 changes: 51 additions & 0 deletions restart-nvidia.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    #!/bin/bash

    set -e

    # list all processes using cuda
    # output: PID, process name
    function get_cuda_processes {
    # run nvisia-smi to get the process id and name
    # output is a bunch of lines in csv format that looks like:
    # 1234, process_name
    nvidia-smi --query-compute-apps=pid,name --format=csv,noheader
    }

    # kill all processes using cuda
    function kill_cuda_processes {

    # get the process ids using a read loop

    while read -r pid _; do
    sudo kill -9 "$pid"
    done < <(get_cuda_processes)

    }

    # restart nvidia driver

    function restart_nvidia {
    sudo rmmod nvidia_uvm || true
    sudo modprobe nvidia_uvm || true
    }

    case "$1" in
    "list")
    while read -r item; do
    pid=$(echo "$item" | cut -d ',' -f 1)
    name=$(echo "$item" | cut -d ',' -f 2)
    echo "PID: $pid Name: $name"
    done < <(get_cuda_processes)

    ;;
    "kill")
    kill_cuda_processes
    ;;
    "restart")
    restart_nvidia
    ;;
    *)
    echo "Usage: $0 {kill|restart}"
    exit 1
    ;;
    esac