Forked from bilalmughal/ec2_graviton_dl_bootstrap.sh
Last active
August 28, 2024 20:46
-
-
Save rromanchuk/fd7895d4546d639a0ad3bf7270c0ad73 to your computer and use it in GitHub Desktop.
This script automates the setup of an Amazon EC2 Graviton ARM-based instances for deep learning tasks. It takes care of installing essential utilities, setting up latest Nvidia drivers and CUDA 12.2 toolkit and cuDNN library, and build PyTorch from source. The step-by-step guided can be checked here. https://jumpshare.com/blog/deep-learning-on-a…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e # Exit on any error | |
# Check if required arguments are provided | |
if [ -z "$REGION" ] || [ -z "$SECURITY_GROUPS" ] || [ -z "$KEY_PAIR" ] || [ -z "$SUBNET" ]; then | |
echo "Error: You must provide REGION, SECURITY_GROUPS, KEY_PAIR, and SUBNET as environment variables." | |
echo "Example:" | |
echo " export REGION=us-east-1" | |
echo " export SECURITY_GROUPS=sg-12345678,sg-87654321" | |
echo " export KEY_PAIR=my-key-pair" | |
echo " export SUBNET=subnet-12345678" | |
exit 1 | |
fi | |
USER_DATA=$(cat <<'EOF' | |
#!/bin/bash | |
set -e # Exit on any error | |
CUDA_HOME=/usr/local/cuda | |
HOME_DIR=/home/ec2-user | |
SRC_DIR=$HOME_DIR/sources | |
USER_EXEC="sudo -u ec2-user" | |
CPUS=$(nproc) | |
LOG_FILE="$HOME_DIR/install.log" | |
if [[ "$1" != "--stdout" ]]; then | |
exec >>"$LOG_FILE" 2>&1 | |
fi | |
# Create source directory | |
mkdir -p $SRC_DIR | |
pushd $SRC_DIR | |
# Install system utilities and updates | |
install_utils() { | |
echo "Installing utilities..." | |
dnf -y update | |
dnf -y groupinstall "Development Tools" | |
dnf install -y openssl-devel cmake3 rust cargo amazon-efs-utils htop iotop yasm nasm jq python3-pip python-devel cronie cronie-anacron | |
echo "Success : Updates and developer tools installed." | |
echo "PATH=$CUDA_HOME/bin:\$PATH" | sudo tee -a $HOME_DIR/.bashrc | |
echo "LD_LIBRARY_PATH=$CUDA_HOME/lib64:\$LD_LIBRARY_PATH" | sudo tee -a $HOME_DIR/.bashrc | |
echo "/usr/local/lib" | sudo tee /etc/ld.so.conf.d/usr-local-lib.conf | |
echo "/usr/local/lib64" | sudo tee -a /etc/ld.so.conf.d/usr-local-lib.conf | |
} | |
# Setup GPU, CUDA and CUDNN | |
setup_gpu() { | |
echo "Setting up GPU..." | |
wget https://us.download.nvidia.com/tesla/535.104.05/NVIDIA-Linux-aarch64-535.104.05.run | |
sh NVIDIA-Linux-aarch64-535.104.05.run --disable-nouveau --silent | |
wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux_sbsa.run | |
sh cuda_12.2.2_535.104.05_linux_sbsa.run --silent --override --toolkit --samples --toolkitpath=/usr/local/cuda-12.2 --samplespath=$CUDA_HOME --no-opengl-libs | |
wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.4.25_cuda12-archive.tar.xz | |
tar -xf cudnn-linux-sbsa-8.9.4.25_cuda12-archive.tar.xz | |
cp -P cudnn-linux-sbsa-8.9.4.25_cuda12-archive/include/* $CUDA_HOME/include/ | |
cp -P cudnn-linux-sbsa-8.9.4.25_cuda12-archive/lib/* $CUDA_HOME/lib64/ | |
chmod a+r $CUDA_HOME/lib64/* | |
ldconfig | |
rm -fr cu* NVIDIA* | |
} | |
# Install PyTorch from source | |
install_pytorch() { | |
echo "Installing PyTorch..." | |
wget https://github.com/ccache/ccache/releases/download/v4.8.3/ccache-4.8.3.tar.xz | |
tar -xf ccache-4.8.3.tar.xz | |
pushd ccache-4.8.3 | |
cmake . | |
make -j $CPUS | |
popd | |
dnf install -y numpy | |
pip3 install typing-extensions | |
git clone --recursive https://github.com/pytorch/pytorch.git | |
pushd pytorch | |
python3 setup.py install | |
popd | |
ldconfig | |
$USER_EXEC pip3 install sympy filelock fsspec networkx | |
} | |
mount -o remount,size=15G /tmp/ | |
# Execute Functions | |
install_utils | |
setup_gpu | |
source $HOME_DIR/.bashrc | |
install_pytorch | |
# Cleanup | |
popd | |
rm -fr $SRC_DIR | |
# Test the installation | |
$USER_EXEC python3 -c "import torch; print('Congratulations, you are all set to go.' if torch.cuda.is_available() else 'Something went wrong. Please check if you missed any steps.')" | |
EOF | |
) | |
AMI_ID="ami-0b9ce70cf1bc24fc3" | |
aws ec2 run-instances \ | |
--image-id $AMI_ID \ | |
--instance-type g5g.4xlarge \ | |
--key-name $KEY_PAIR \ | |
--subnet-id $SUBNET \ | |
--security-group-ids $SECURITY_GROUPS \ | |
--region $REGION \ | |
--block-device-mappings '[{"DeviceName":"/dev/xvda","Ebs":{"VolumeSize":100,"VolumeType":"gp3"}}]' \ | |
--tag-specifications 'ResourceType=instance,Tags=[{Key=Name,Value=AMI-Builder}]' \ | |
--user-data "$USER_DATA" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/bin/bash | |
whoami | |
# ec2-user | |
pwd | |
# /home/ec2-user | |
sudo su | |
CUDA_HOME=/usr/local/cuda | |
HOME_DIR=/home/ec2-user | |
SRC_DIR=$HOME_DIR/sources | |
USER_EXEC="sudo -u ec2-user" | |
CPUS=$(nproc) | |
LOG_FILE="$HOME_DIR/install.log" | |
mkdir -p $SRC_DIR | |
pushd $SRC_DIR | |
dnf -y update | |
dnf -y groupinstall "Development Tools" | |
dnf install -y openssl-devel cmake3 rust cargo amazon-efs-utils htop iotop yasm nasm jq python3-pip python-devel cronie cronie-anacron | |
echo "Success : Updates and developer tools installed." | |
echo "PATH=$CUDA_HOME/bin:\$PATH" | sudo tee -a $HOME_DIR/.bashrc | |
echo "LD_LIBRARY_PATH=$CUDA_HOME/lib64:\$LD_LIBRARY_PATH" | sudo tee -a $HOME_DIR/.bashrc | |
echo "/usr/local/lib" | sudo tee /etc/ld.so.conf.d/usr-local-lib.conf | |
echo "/usr/local/lib64" | sudo tee -a /etc/ld.so.conf.d/usr-local-lib.conf | |
cat .bashrc | |
# .bashrc | |
# Source global definitions | |
if [ -f /etc/bashrc ]; then | |
. /etc/bashrc | |
fi | |
# User specific environment | |
if ! [[ "$PATH" =~ "$HOME/.local/bin:$HOME/bin:" ]] | |
then | |
PATH="$HOME/.local/bin:$HOME/bin:$PATH" | |
fi | |
export PATH | |
# Uncomment the following line if you don't like systemctl's auto-paging feature: | |
# export SYSTEMD_PAGER= | |
# User specific aliases and functions | |
if [ -d ~/.bashrc.d ]; then | |
for rc in ~/.bashrc.d/*; do | |
if [ -f "$rc" ]; then | |
. "$rc" | |
fi | |
done | |
fi | |
unset rc | |
PATH=/usr/local/cuda/bin:$PATH | |
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH | |
echo "Setting up GPU..." | |
wget https://us.download.nvidia.com/tesla/535.104.05/NVIDIA-Linux-aarch64-535.104.05.run | |
sh NVIDIA-Linux-aarch64-535.104.05.run --disable-nouveau --silent | |
wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux_sbsa.run | |
sh cuda_12.2.2_535.104.05_linux_sbsa.run --silent --override --toolkit --samples --toolkitpath=/usr/local/cuda-12.2 --samplespath=$CUDA_HOME --no-opengl-libs | |
wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.4.25_cuda12-archive.tar.xz | |
tar -xf cudnn-linux-sbsa-8.9.4.25_cuda12-archive.tar.xz | |
cp -P cudnn-linux-sbsa-8.9.4.25_cuda12-archive/include/* $CUDA_HOME/include/ | |
cp -P cudnn-linux-sbsa-8.9.4.25_cuda12-archive/lib/* $CUDA_HOME/lib64/ | |
chmod a+r $CUDA_HOME/lib64/* | |
ldconfig | |
echo "Installing PyTorch..." | |
wget https://github.com/ccache/ccache/releases/download/v4.8.3/ccache-4.8.3.tar.xz | |
tar -xf ccache-4.8.3.tar.xz | |
pushd ccache-4.8.3 | |
cmake . | |
make -j $CPUS | |
popd | |
dnf install -y numpy | |
pip3 install typing-extensions | |
git clone --recursive https://github.com/pytorch/pytorch.git | |
pushd pytorch | |
python3 setup.py install | |
popd | |
ldconfig | |
sudo -u ec2-user pip3 install sympy filelock fsspec networkx | |
Author
rromanchuk
commented
Aug 28, 2024
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment