Created
October 17, 2019 10:09
-
-
Save HenriTEL/f14a2108ce428543e322461d6427ef57 to your computer and use it in GitHub Desktop.
A custom driver installer for gke ubuntu based images that includes nvidia-docker.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: nvidia-driver-installer | |
namespace: kube-system | |
labels: | |
k8s-app: nvidia-driver-installer | |
spec: | |
selector: | |
matchLabels: | |
k8s-app: nvidia-driver-installer | |
updateStrategy: | |
type: RollingUpdate | |
template: | |
metadata: | |
labels: | |
name: nvidia-driver-installer | |
k8s-app: nvidia-driver-installer | |
spec: | |
affinity: | |
nodeAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
nodeSelectorTerms: | |
- matchExpressions: | |
- key: cloud.google.com/gke-accelerator | |
operator: Exists | |
tolerations: | |
- operator: "Exists" | |
volumes: | |
- name: dev | |
hostPath: | |
path: /dev | |
- name: boot | |
hostPath: | |
path: /boot | |
- name: root-mount | |
hostPath: | |
path: / | |
- name: install-script | |
configMap: | |
name: ubuntu-nvidia-install-script | |
items: | |
- key: ubuntu-nvidia-install.sh | |
path: ubuntu-nvidia-install | |
defaultMode: 0744 | |
hostPID: true | |
initContainers: | |
- name: nvidia-driver-installer | |
image: gke-nvidia-installer:fixed | |
resources: | |
requests: | |
cpu: 0.15 | |
securityContext: | |
privileged: true | |
volumeMounts: | |
- name: boot | |
mountPath: /boot | |
- name: dev | |
mountPath: /dev | |
- name: root-mount | |
mountPath: /root | |
- name: install-script | |
mountPath: /usr/local/bin | |
- name: docker-reloader | |
image: debian:10.1-slim | |
command: [nsenter, -t, '1', -m, -u, -n, -i, service, docker, reload] | |
securityContext: | |
privileged: true | |
containers: | |
- name: pause | |
image: gcr.io/google-containers/pause:2.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: kustomize.config.k8s.io/v1beta1 | |
kind: Kustomization | |
namespace: kube-system | |
generatorOptions: | |
disableNameSuffixHash: true | |
configMapGenerator: | |
- name: ubuntu-nvidia-install-script | |
files: | |
- ubuntu-nvidia-install.sh | |
resources: | |
- daemonset.yml |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# CLOUD_IMG: This file was created/modified by the Cloud Image build process | |
# | |
# This script is run by the nvidia-container-first-boot script. | |
# It's purpose is to install nvidia drivers. | |
# Copyright 2018 Canonical Ltd. | |
# | |
# These programs are free software; you can redistribute and/or modify | |
# them under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2, or (at your option) | |
# any later version. | |
# These programs are distributed in the hope that they will be useful, but | |
# WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
# General Public License for more details. | |
# You should have received a copy of the GNU General Public License with | |
# your Ubuntu system, in /usr/share/common-licenses/GPL, or with the | |
# livecd-rootfs source package as the file COPYING. If not, write to | |
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
# Boston, MA 02110-1301 USA. | |
set -e | |
NVIDIA_INSTALL_DIR="/opt/nvidia" | |
ROOT_MOUNT_DIR="/root" | |
LINUX_FLAVOR="ubuntu18.04" | |
configure_nvidia_installation_dirs() { | |
echo "Configuring installation directories..." | |
tmp_deb_file=/var/cache/nvidia-driver-gke.deb | |
tmp_driver_version_file=/var/cache/nvidia-driver-version | |
kvers="$(uname -r)" | |
rm --force "$tmp_deb_file" "$tmp_driver_version_file" | |
# Check to see if the nvidia version to install has been overridden | |
if [ -n "$NVIDIA_DRIVER_VERSION" ] | |
then | |
nvidia_version="$NVIDIA_DRIVER_VERSION" | |
else | |
# Check to see if the nvidia version to install will change due to kernel | |
# having been upgraded/downgraded | |
if [ "$(cat /var/lib/nvidia/shipped-kernel-version || true)" = "$kvers" ] | |
then | |
# FIXME: nvidia module dir is versioned and we probably want it not to be | |
if [ -e "/lib/modules/$kvers/nvidia-*/nvidia.ko" ]; then | |
echo "Configuring installation directories... SKIPPED." | |
return 0 | |
fi | |
nvidia_version="$(cat /var/lib/nvidia/shipped-nvidia-version)" | |
else | |
# Download the current nvidia driver version from object storage. | |
# The version will change based on the image variant | |
# (currently d1703-0 or d1809-0) and we can read this variant from | |
# /var/lib/nvidia/shipped-gke-variant | |
gke_variant="$(cat /var/lib/nvidia/shipped-gke-variant)" | |
wget "https://www.googleapis.com/storage/v1/b/ubuntu_nvidia_packages/o/current-driver-$kvers-$gke_variant-amd64?alt=media" -O "$tmp_driver_version_file" | |
nvidia_version="$(cat "$tmp_driver_version_file")" | |
fi | |
fi | |
gs_file="nvidia-driver-gke_$kvers-${nvidia_version}_amd64.deb" | |
wget "https://www.googleapis.com/storage/v1/b/ubuntu_nvidia_packages/o/$gs_file?alt=media" -O "$tmp_deb_file" | |
# Installing kernel modules triggers the kernel, but we don't want to | |
# pointlessly rebuild initramfs (which we shouldn't actually have anyway, | |
# that's a bug) or trigger a grub update. The latter fails due to | |
# grub-probe not being able to identify the rootfs, and skipping these | |
# hooks will speed up the driver package installation. | |
dpkg-divert --rename --local \ | |
--divert /etc/kernel/postinst.d/initramfs-tools.nvidia-divert \ | |
--add /etc/kernel/postinst.d/initramfs-tools | |
dpkg-divert --rename --local \ | |
--divert /etc/kernel/postinst.d/zz-update-grub.nvidia-divert \ | |
--add /etc/kernel/postinst.d/zz-update-grub | |
if ! DEBIAN_FRONTEND=noninteractive dpkg --install "$tmp_deb_file"; then | |
# could dpkg have failed because of a package dependency? | |
apt-get update | |
DEBIAN_FRONTEND=noninteractive apt-get --fix-broken --assume-yes install | |
fi | |
dpkg-divert --rename --local --remove /etc/kernel/postinst.d/initramfs-tools | |
dpkg-divert --rename --local --remove /etc/kernel/postinst.d/zz-update-grub | |
rm --force "$tmp_deb_file" "$tmp_driver_version_file" | |
echo "Configuring installation directories... DONE." | |
} | |
install_nvidia_docker() { | |
wget -qO- "https://nvidia.github.io/nvidia-container-runtime/gpgkey" | apt-key add - | |
wget -qO- "https://nvidia.github.io/nvidia-docker/gpgkey" | apt-key add - | |
wget "https://nvidia.github.io/nvidia-container-runtime/${LINUX_FLAVOR}/nvidia-container-runtime.list" -O "/etc/apt/sources.list.d/nvidia-container-runtime.list" | |
wget "https://nvidia.github.io/nvidia-docker/${LINUX_FLAVOR}/nvidia-docker.list" -O "/etc/apt/sources.list.d/nvidia-docker.list" | |
apt-get update | |
DEBIAN_FRONTEND=noninteractive apt-get --assume-yes install nvidia-container-runtime nvidia-docker2 | |
echo '{ | |
"default-runtime": "nvidia", | |
"runtimes": { | |
"nvidia": { | |
"path": "nvidia-container-runtime", | |
"runtimeArgs": [] | |
} | |
} | |
}' > /etc/docker/daemon.json | |
} | |
verify_nvidia_installation() { | |
echo "Verifying Nvidia installation..." | |
export PATH="${NVIDIA_INSTALL_DIR}/bin:${PATH}" | |
export LD_LIBRARY_PATH="${NVIDIA_INSTALL_DIR}/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" | |
if [ ! -c "/dev/nvidia-uvm" ]; then | |
${NVIDIA_INSTALL_DIR}/sbin/create-uvm-dev-node | |
fi | |
nvidia-smi | |
echo "Verifying Nvidia installation... DONE." | |
} | |
update_host_ld_cache() { | |
echo "Updating host's ld cache..." | |
echo "${NVIDIA_INSTALL_DIR}/lib64" >> "${ROOT_MOUNT_DIR}/etc/ld.so.conf" | |
ldconfig -r "${ROOT_MOUNT_DIR}" | |
echo "Updating host's ld cache... DONE." | |
} | |
undo_docker_redirects() { | |
umount /etc/hosts /etc/resolv.conf /etc/hostname | |
mv /etc /etc.bak | |
ln --symbolic /root/etc /etc | |
} | |
create_nvidia_symlink() { | |
ln --symbolic --no-target-directory --force "${NVIDIA_INSTALL_DIR}" "${ROOT_MOUNT_DIR}/home/kubernetes/bin/nvidia" | |
} | |
main() { | |
echo "Installing nvidia drivers..." | |
undo_docker_redirects | |
configure_nvidia_installation_dirs | |
install_nvidia_docker | |
update_host_ld_cache | |
verify_nvidia_installation | |
create_nvidia_symlink | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
unbuntu-nvidia-install.sh
is based on https://github.com/GoogleCloudPlatform/container-engine-accelerators/blob/master/nvidia-driver-installer/ubuntu/entrypoint.sh