Running microk8s enable gpu
didn't start the GPU operator correctly. I found this to work.
dpkg -l | grep nvidia-container
nvidia-container-cli info
microk8s kubectl describe node | grep -i gpu
microk8s kubectl get pods -A | grep nvidia
microk8s kubectl logs -n gpu-operator-resources nvidia-container-toolkit-daemonset-XXXXX --all-containers
microk8s kubectl logs -n gpu-operator-resources nvidia-operator-validator-XXXXX --all-containers
ls -l /dev/nvidia*
ls -l /dev/char/ | grep nvidia
sudo ln -s /dev/nvidiactl /dev/char/195:255
sudo ln -s /dev/nvidia-modeset /dev/char/195:254
sudo ln -s /dev/nvidia0 /dev/char/195:0
sudo ln -s /dev/nvidia-uvm /dev/char/235:0
sudo ln -s /dev/nvidia-uvm-tools /dev/char/235:1
microk8s kubectl patch clusterpolicy cluster-policy --type merge -p '{"spec":{"validator":{"driver":{"env":[{"name":"DISABLE_DEV_CHAR_SYMLINK_CREATION","value":"true"}]}}}}'
microk8s kubectl describe pod -n gpu-operator-resources nvidia-dcgm-exporter-XXXXX | grep -i "image:"
sudo ctr -n=k8s.io image pull nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04
microk8s use containerd
sudo usermod -aG microk8s $USER
newgrp microk8s
microk8s kubectl run gpu-test --rm -it --restart=Never --image=nvidia/cuda:12.2.0-runtime-ubuntu22.04
Run nvidia-smi inside to check