Last active
January 13, 2024 22:04
-
-
Save sghael/66a6a004e9658244465dec2eafbd301b to your computer and use it in GitHub Desktop.
Code to solve `Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.) return torch._C._cuda_getDeviceCount() > 0` when un-suspending your Linux machine. I am using this on Ubuntu 22.04 with Nvidia Drivers 5.45 and Cuda 12.3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Function to find and kill gnome and xorg processes using the NVIDIA GPU | |
kill_gpu_processes() { | |
# Find Gnome and Xorg processes using the NVIDIA GPU | |
local gpu_processes=$(nvidia-smi | grep 'gnome\|xorg' | awk '{ print $5 }') | |
# Kill the processes gracefully | |
for pid in $gpu_processes; do | |
echo "Killing process with PID $pid" | |
kill $pid | |
done | |
} | |
# Function to unload and load the NVIDIA kernel modules | |
reload_nvidia_modules() { | |
echo "Unloading nvidia_uvm module" | |
sudo modprobe -r nvidia_uvm | |
echo "Loading nvidia_uvm module" | |
sudo modprobe nvidia_uvm | |
} | |
# Main script execution | |
kill_gpu_processes | |
reload_nvidia_modules |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment