Created
October 27, 2020 06:45
-
-
Save markrexwinkel/3607c7073f6f94f2f05d51ef48c04e32 to your computer and use it in GitHub Desktop.
k3s docker container gpu support
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [plugins.opt] | |
| path = "{{ .NodeConfig.Containerd.Opt }}" | |
| [plugins.cri] | |
| stream_server_address = "127.0.0.1" | |
| stream_server_port = "10010" | |
| {{- if .IsRunningInUserNS }} | |
| disable_cgroup = true | |
| disable_apparmor = true | |
| restrict_oom_score_adj = true | |
| {{end}} | |
| {{- if .NodeConfig.AgentConfig.PauseImage }} | |
| sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}" | |
| {{end}} | |
| {{- if not .NodeConfig.NoFlannel }} | |
| [plugins.cri.cni] | |
| bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}" | |
| conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}" | |
| {{end}} | |
| [plugins.cri.containerd.runtimes.runc] | |
| # ---- changed from 'io.containerd.runc.v2' for GPU support | |
| runtime_type = "io.containerd.runtime.v1.linux" | |
| # ---- added for GPU support | |
| [plugins.linux] | |
| runtime = "nvidia-container-runtime" | |
| {{ if .PrivateRegistryConfig }} | |
| {{ if .PrivateRegistryConfig.Mirrors }} | |
| [plugins.cri.registry.mirrors]{{end}} | |
| {{range $k, $v := .PrivateRegistryConfig.Mirrors }} | |
| [plugins.cri.registry.mirrors."{{$k}}"] | |
| endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}] | |
| {{end}} | |
| {{range $k, $v := .PrivateRegistryConfig.Configs }} | |
| {{ if $v.Auth }} | |
| [plugins.cri.registry.configs."{{$k}}".auth] | |
| {{ if $v.Auth.Username }}username = "{{ $v.Auth.Username }}"{{end}} | |
| {{ if $v.Auth.Password }}password = "{{ $v.Auth.Password }}"{{end}} | |
| {{ if $v.Auth.Auth }}auth = "{{ $v.Auth.Auth }}"{{end}} | |
| {{ if $v.Auth.IdentityToken }}identitytoken = "{{ $v.Auth.IdentityToken }}"{{end}} | |
| {{end}} | |
| {{ if $v.TLS }} | |
| [plugins.cri.registry.configs."{{$k}}".tls] | |
| {{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}} | |
| {{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}} | |
| {{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}} | |
| {{end}} | |
| {{end}} | |
| {{end}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| apiVersion: v1 | |
| kind: Pod | |
| metadata: | |
| name: cuda-vector-add | |
| spec: | |
| restartPolicy: OnFailure | |
| containers: | |
| - name: cuda-vector-add | |
| image: "k8s.gcr.io/cuda-vector-add:v0.1" | |
| resources: | |
| limits: | |
| nvidia.com/gpu: 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| FROM ubuntu:18.04 as base | |
| RUN apt-get update -y && apt-get install -y ca-certificates | |
| ADD build/out/data.tar.gz /image | |
| RUN mkdir -p /image/etc/ssl/certs /image/run /image/var/run /image/tmp /image/lib/modules /image/lib/firmware && \ | |
| cp /etc/ssl/certs/ca-certificates.crt /image/etc/ssl/certs/ca-certificates.crt | |
| RUN cd image/bin && \ | |
| rm -f k3s && \ | |
| ln -s k3s-server k3s | |
| FROM ubuntu:18.04 | |
| RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections | |
| RUN apt-get update -y && apt-get -y install gnupg2 curl | |
| RUN apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | |
| RUN sh -c 'echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list' | |
| RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | apt-key add - | |
| RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/ubuntu18.04/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list | |
| RUN apt-get update -y | |
| RUN apt-get -y install cuda-drivers | |
| RUN apt-get -y install nvidia-container-runtime | |
| COPY --from=base /image / | |
| RUN mkdir -p /etc && \ | |
| echo 'hosts: files dns' > /etc/nsswitch.conf | |
| RUN chmod 1777 /tmp | |
| RUN mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/ | |
| COPY config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl | |
| RUN mkdir -p /var/lib/rancher/k3s/server/manifests | |
| COPY gpu.yaml /var/lib/rancher/k3s/server/manifests/gpu.yaml | |
| VOLUME /var/lib/kubelet | |
| VOLUME /var/lib/rancher/k3s | |
| VOLUME /var/lib/cni | |
| VOLUME /var/log | |
| ENV PATH="$PATH:/bin/aux" | |
| ENTRYPOINT ["/bin/k3s"] | |
| CMD ["agent"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| apiVersion: apps/v1 | |
| kind: DaemonSet | |
| metadata: | |
| name: nvidia-device-plugin-daemonset | |
| namespace: kube-system | |
| spec: | |
| selector: | |
| matchLabels: | |
| name: nvidia-device-plugin-ds | |
| template: | |
| metadata: | |
| # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler | |
| # reserves resources for critical add-on pods so that they can be rescheduled after | |
| # a failure. This annotation works in tandem with the toleration below. | |
| annotations: | |
| scheduler.alpha.kubernetes.io/critical-pod: "" | |
| labels: | |
| name: nvidia-device-plugin-ds | |
| spec: | |
| tolerations: | |
| # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode. | |
| # This, along with the annotation above marks this pod as a critical add-on. | |
| - key: CriticalAddonsOnly | |
| operator: Exists | |
| containers: | |
| - env: | |
| - name: DP_DISABLE_HEALTHCHECKS | |
| value: xids | |
| image: nvidia/k8s-device-plugin:1.11 | |
| name: nvidia-device-plugin-ctr | |
| securityContext: | |
| allowPrivilegeEscalation: true | |
| capabilities: | |
| drop: ["ALL"] | |
| volumeMounts: | |
| - name: device-plugin | |
| mountPath: /var/lib/kubelet/device-plugins | |
| volumes: | |
| - name: device-plugin | |
| hostPath: | |
| path: /var/lib/kubelet/device-plugins |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Steps I took to get gpu working on a k3s container:
docker run --gpus allflag