Skip to content

Instantly share code, notes, and snippets.

@0x0L
Last active July 7, 2023 07:59
Show Gist options
  • Save 0x0L/ba7fce19598293dcc31df74fab4c753e to your computer and use it in GitHub Desktop.
Save 0x0L/ba7fce19598293dcc31df74fab4c753e to your computer and use it in GitHub Desktop.
Setup P3 EC2 Rocky 9
# https://rockylinux.org/cloud-images/
# AMI 0951577d22b8e8dd4 rocky 9.1
# Instance p3.2xlarge
dnf install -y epel-release
crb enable
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
dnf module install -y nvidia-driver:latest
reboot
# at each startup
for a in /sys/bus/pci/devices/*; do echo 0 | tee -a $a/numa_node; done
# Docker stuff
dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo
dnf install -y docker-ce
systemctl --now enable docker
usermod -aG docker rocky
usermod -aG wheel rocky
curl -s -L https://nvidia.github.io/libnvidia-container/rhel9.1/libnvidia-container.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
dnf install -y nvidia-container-toolkit
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
# https://docs.conda.io/en/latest/miniconda.html#linux-installers
curl -L -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
bash Miniconda3-latest-Linux-x86_64.sh
# conda create -n testenv -c conda-forge python=3.10
# conda activate testenv
# https://www.tensorflow.org/install/pip
conda install -c conda-forge cudatoolkit=11.8.0
# conda install -c nvidia cuda-nvcc=11.3.58
pip install nvidia-cuda-nvcc-cu11 nvidia-cudnn-cu11==8.6.0.163 tensorflow==2.12.*
mkdir -p $CONDA_PREFIX/lib/nvvm/libdevice
cp $CONDA_PREFIX/lib/libdevice.10.bc $CONDA_PREFIX/lib/nvvm/libdevice/
mkdir -p $CONDA_PREFIX/etc/conda/activate.d
echo 'NVCC_PATH=$(dirname $(python -c "import nvidia.cuda_nvcc;print(nvidia.cuda_nvcc.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
echo 'export PATH=$PATH:$NVCC_PATH/bin' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib:$CUDNN_PATH/lib' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
echo 'export XLA_FLAGS=--xla_gpu_cuda_data_dir=$CONDA_PREFIX/lib/\n' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
# Verify install:
source $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
# https://pytorch.org/get-started/locally/
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# https://github.com/tensorflow/examples
# https://github.com/pytorch/examples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment