Typically installing K3s is a one-liner. However, because we need to enable Nvidia GPU support, we'll need a few extra steps.
First, run the following the download and install K3s. INSTALL_K3S_SKIP_START
skips automatically starting K3s because we'll need to modify containerd to use the nvidia-container-runtime
(more on that later). INSTALL_K3S_EXEC="server --no-deploy traefik"
is to skip Traefik because KubeFlow uses Istio. If this were used, the Istio wouldn't initialise properly:
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC="server --no-deploy traefik" sh
sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config
chmod 644 ~/.kube/config
sudo mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/
sudo su -
As root, do the following:
cat <<"EOF" > /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
[plugins.opt]
path = "{{ .NodeConfig.Containerd.Opt }}"
[plugins.cri]
stream_server_address = "127.0.0.1"
stream_server_port = "10010"
{{- if .IsRunningInUserNS }}
disable_cgroup = true
disable_apparmor = true
restrict_oom_score_adj = true
{{end}}
{{- if .NodeConfig.AgentConfig.PauseImage }}
sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}"
{{end}}
{{- if not .NodeConfig.NoFlannel }}
[plugins.cri.cni]
bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}"
conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}"
{{end}}
[plugins.cri.containerd.runtimes.runc]
# ---- changed from 'io.containerd.runc.v2' for GPU support
runtime_type = "io.containerd.runtime.v1.linux"
# ---- added for GPU support
[plugins.linux]
runtime = "nvidia-container-runtime"
{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins.cri.registry.mirrors]{{end}}
{{range $k, $v := .PrivateRegistryConfig.Mirrors }}
[plugins.cri.registry.mirrors."{{$k}}"]
endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}]
{{end}}
{{range $k, $v := .PrivateRegistryConfig.Configs }}
{{ if $v.Auth }}
[plugins.cri.registry.configs."{{$k}}".auth]
{{ if $v.Auth.Username }}username = "{{ $v.Auth.Username }}"{{end}}
{{ if $v.Auth.Password }}password = "{{ $v.Auth.Password }}"{{end}}
{{ if $v.Auth.Auth }}auth = "{{ $v.Auth.Auth }}"{{end}}
{{ if $v.Auth.IdentityToken }}identitytoken = "{{ $v.Auth.IdentityToken }}"{{end}}
{{end}}
{{ if $v.TLS }}
[plugins.cri.registry.configs."{{$k}}".tls]
{{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}}
{{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}}
{{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}}
{{end}}
{{end}}
{{end}}
EOF
Change the permissions for this file:
chown 644 /etc/rancher/k3s/k3s.yaml
Exit out of root:
exit
Append KUBECONFIG
to ~/.bashrc
. This would make kubectl
work properly:
echo `export KUBECONFIG="/etc/rancher/k3s/k3s.yaml"` >> ~/.bashrc
Start the K3s service:
sudo k3s service start
Install the K8s Nvidia Device Plugin:
helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
helm repo update
helm install \
--version=0.9.0 \
--generate-name \
nvdp/nvidia-device-plugin