Created
August 17, 2024 10:04
-
-
Save craigphicks/db3063415796b33404c253ed3099527d to your computer and use it in GitHub Desktop.
Setup an AWS Linux AMI with NVIDIA-Linux-x86_64 drivers and nvidia-container-toolkit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# Step-by-Step Installation Guide | |
# Step 0: Remove existing NVIDIA drivers and CUDA installations | |
sudo yum erase -y nvidia cuda | |
# Step 1: Update the system and install dependencies | |
sudo yum update -y | |
sudo yum groupinstall -y "Development Tools" | |
sudo yum install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) dkms | |
# Step X: Blacklist the nouveau driver === PROBABLY NOT NEEDED, Step 2.5 is needed instead | |
# sudo bash -c 'echo "blacklist nouveau" > /etc/modprobe.d/blacklist-nouveau.conf' | |
# sudo bash -c 'echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf' | |
# sudo dracut --force | |
sudo reboot | |
# Step 2: Download and install the NVIDIA driver | |
aws s3 cp s3://craigaws.bucket.1/NVIDIA-Linux-x86_64-550.90.07.run . | |
aws s3 cp s3://craigaws.bucket.1/NVIDIA-Linux-x86_64-550.90.07.run . | |
wget https://us.download.nvidia.com/XFree86/Linux-x86_64/550.90.07/NVIDIA-Linux-x86_64-550.90.07.run | |
chmod +x NVIDIA-Linux-x86_64-550.90.07.run | |
# Step 2.5. bug fix c.f. https://github.com/amazonlinux/amazon-linux-2023/issues/538#issuecomment-1793271091 | |
sudo dnf install kernel-modules-extra | |
sudo ./NVIDIA-Linux-x86_64-550.90.07.run --dkms -s | |
# These warning messages are expected: | |
# WARNING: nvidia-installer was forced to guess the X library path '/usr/lib64' and X module path '/usr/lib64/xorg/modules'; these paths were not queryable from the system. If X fails to find the | |
# NVIDIA X driver module, please install the `pkg-config` utility and the X.Org SDK/development package for your distribution and reinstall the driver. | |
# WARNING: This NVIDIA driver package includes Vulkan components, but no Vulkan ICD loader was detected on this system. The NVIDIA Vulkan ICD will not function without the loader. Most distributions | |
# package the Vulkan loader; try installing the "vulkan-loader", "vulkan-icd-loader", or "libvulkan1" package. | |
# Step 3: Reboot the instance | |
sudo reboot | |
# Step 4: Verify the installation | |
# After reboot, you can run the following command to verify the installation | |
nvidia-smi | |
# Step 5: Install Docker | |
sudo yum update -y | |
sudo yum install -y docker | |
sudo service docker start | |
sudo usermod -a -G docker ec2-user | |
docker info | |
# Step 6: Install NVIDIA Container Toolkit | |
## Set up the package repository and GPG key: | |
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo | |
sudo yum clean expire-cache | |
sudo yum update -y | |
## Install the NVIDIA Container Toolkit: | |
sudo yum install -y nvidia-container-toolkit | |
## Restart the Docker daemon to complete the installation: | |
sudo systemctl restart docker | |
## Run a test container to verify that the installation was successful: | |
docker run --rm --gpus all nvidia/cuda:11.0.3-base-ubuntu20.04 nvidia-smi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment