Last active
May 16, 2020 14:59
-
-
Save innovia/968bd6709d83a8241a80d42a2922541b to your computer and use it in GitHub Desktop.
download nvidia drivers and CUDA libs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script is built on kops bootstrap | |
# https://github.com/dcwangmit01/kops/blob/ef958a7f870eb3dc20981617859b8ad69057bb2a/hooks/nvidia-bootstrap/image/run.sh | |
mkdir nvidia | |
cd nvidia | |
# Get the NVIDIA driver | |
wget http://us.download.nvidia.com/tesla/390.46/NVIDIA-Linux-x86_64-390.46.run | |
chmod a+x NVIDIA-Linux-x86_64-390.46.run | |
# Prepare for driver installation using GCC7 | |
apt-get update | |
apt-get --no-upgrade -y install gcc linux-headers-$(uname -r) | |
echo "Installing gcc-7 on host machine" | |
# Temporarily add the debian "buster" repo where gcc-7 lives | |
sed -n '/buster/q;p' -i /etc/apt/sources.list | |
echo "deb http://deb.debian.org/debian buster main" >> /etc/apt/sources.list | |
# Install gcc-7 | |
apt-get update | |
apt-get -y install linux-headers-$(uname -r) | |
DEBIAN_FRONTEND=noninteractive apt-get -t buster --no-upgrade -y install gcc-7 | |
# Remove the debian "buster" repo line that was added above | |
sed -n '/buster/q;p' -i /etc/apt/sources.list | |
apt-get update | |
# Unload open-source nouveau driver if it exists | |
# The nvidia drivers won't install otherwise | |
# "g3" instances in particular have this module auto-loaded | |
modprobe -r nouveau | |
# Download CUDA 9.1 toolkit and patches | |
cuda_files=( \ | |
"https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux" \ | |
"https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/1/cuda_9.1.85.1_linux" \ | |
"https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/2/cuda_9.1.85.2_linux" \ | |
"https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/3/cuda_9.1.85.3_linux" \ | |
) | |
downloads=(${cuda_files[@]}) | |
length=${#downloads[@]} | |
for (( i=0; i<${length}; i++ )); do | |
download=${downloads[$i]} | |
curl -LO $download | |
done | |
chmod a+x cuda* | |
echo "Installing NVIDIA Driver" | |
CC=/usr/bin/gcc-7 ./NVIDIA-Linux-x86_64-390.46.run --accept-license --silent | |
echo "Install CUDA9.1 toolkit" | |
./cuda_9.1.85_387.26_linux --toolkit --silent --verbose | |
echo "Install CUDA 9.1 patch 1" | |
./cuda_9.1.85.1_linux --accept-eula --silent | |
echo "Install CUDA 9.1 patch 2" | |
./cuda_9.1.85.2_linux --accept-eula --silent | |
echo "Install CUDA 9.1 patch 3" | |
./cuda_9.1.85.3_linux --accept-eula --silent | |
echo "verify GPU" | |
nvidia-smi --list-gpus || echo "faile to run nvidia-smi" && exit 1 | |
# Conifgure GPU Clock | |
# Common configurations | |
nvidia-smi -pm 1 | |
nvidia-smi --auto-boost-default=0 | |
nvidia-smi --auto-boost-permission=0 | |
apt-get install -y jq | |
# The following custom configuration will be added to the userdata using kops edit ig | |
# Custom configurations per class of nvidia video card | |
# case "$AWS_INSTANCE_CLASS" in | |
# "g2" | "g3") | |
# chroot ${ROOTFS_DIR} nvidia-smi -ac 2505,1177 | |
# ;; | |
# "p2") | |
# chroot ${ROOTFS_DIR} nvidia-smi -ac 2505,875 | |
# chroot ${ROOTFS_DIR} nvidia-smi -acp 0 | |
# ;; | |
# "p3") | |
# chroot ${ROOTFS_DIR} nvidia-smi -ac 877,1530 | |
# chroot ${ROOTFS_DIR} nvidia-smi -acp 0 | |
# ;; | |
# *) | |
# ;; | |
# esac | |
# Ensure that the device node exists | |
/sbin/modprobe nvidia-uvm | |
if ! test -e /dev/nvidia-uvm; then | |
# Find out the major device number used by the nvidia-uvm driver | |
D=`grep nvidia-uvm /proc/devices | awk '{print $1}'` | |
mknod -m 666 /dev/nvidia-uvm c $D 0 | |
fi | |
# Restart Kubelet | |
echo "Restarting Kubelet" | |
systemctl restart kubelet.service | |
# Cleanup kubelet config (labels taints etc) for enabling this image to run on any machine | |
rm -rf /var/lib/kubelet | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the script! You forgot to unquote on line 21, and I had to run
/sbin/modprobe nvidia-uvm
before line 101 to make it work.