(driver is outdated)
Note: the better solution would be to use the NVIDIA gpu-operator.
They are exclusive -- either install the driver via gpu-operator with driver: {enabled: true} in the helm values -- or manually on your nodes like below:
# This worked on a VM with a RTX 50xx GPU attached (in proxmox, with PCIe Passthrough)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d'=' -f2 | tr -d '.')/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update
sudo apt install cuda-toolkit
#sudo apt install nvidia-gds
sudo apt install nvidia-headless-570-open nvidia-utils-570
reboot
nvidia-smi # should work now
### if you're on a B200, install infiniband stuff: --- Don't install this on a non-NVlink system!
sudo apt install nvidia-fabricmanager nvlsm infiniband-diags # the latter only on 22.04 necessary?
if ! grep ib_umad /etc/modules-load.d/modules.conf; then echo ib_umad | sudo tee -a /etc/modules-load.d/modules.conf; fi
sudo modprobe ib_umad
sudo systemctl enable --now nvidia-fabricmanager
nvidia-smi topo -m # View GPU interconnect topology -- should work now
### (end infiniband)
sudo apt install -y nvidia-container-runtime nvidia-container-toolkit
# Example cuda test file (you can find the file below)
/usr/local/cuda/bin/nvcc cuda_test.cu -o cuda_test
./cuda_test
# Success! Found 1 CUDA devices.
# Activate GPUs in k3s
sudo systemctl restart k3s
sudo grep nvidia /var/lib/rancher/k3s/agent/etc/containerd/config.toml # check, should not return empty
sudo nvidia-ctk runtime configure --runtime=containerd # (not necessary)?
# The only difference is that k3s doesn't set "privileged_without_host_devices=false" -- only relevant for privileged containers
# Optional: Test in containerd (not really needed)
sudo ctr images pull docker.io/nvidia/cuda:12.9.1-base-ubuntu24.04
sudo ctr run --rm --gpus 0 -t docker.io/nvidia/cuda:12.9.1-base-ubuntu24.04 cuda-12.9.1-base-ubuntu24.04 nvidia-smi