Last active
July 7, 2023 07:59
-
-
Save 0x0L/ba7fce19598293dcc31df74fab4c753e to your computer and use it in GitHub Desktop.
Setup P3 EC2 Rocky 9
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://rockylinux.org/cloud-images/ | |
# AMI 0951577d22b8e8dd4 rocky 9.1 | |
# Instance p3.2xlarge | |
dnf install -y epel-release | |
crb enable | |
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo | |
dnf module install -y nvidia-driver:latest | |
reboot | |
# at each startup | |
for a in /sys/bus/pci/devices/*; do echo 0 | tee -a $a/numa_node; done | |
# Docker stuff | |
dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo | |
dnf install -y docker-ce | |
systemctl --now enable docker | |
usermod -aG docker rocky | |
usermod -aG wheel rocky | |
curl -s -L https://nvidia.github.io/libnvidia-container/rhel9.1/libnvidia-container.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo | |
dnf install -y nvidia-container-toolkit | |
nvidia-ctk runtime configure --runtime=docker | |
systemctl restart docker |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://docs.conda.io/en/latest/miniconda.html#linux-installers | |
curl -L -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" | |
bash Miniconda3-latest-Linux-x86_64.sh | |
# conda create -n testenv -c conda-forge python=3.10 | |
# conda activate testenv | |
# https://www.tensorflow.org/install/pip | |
conda install -c conda-forge cudatoolkit=11.8.0 | |
# conda install -c nvidia cuda-nvcc=11.3.58 | |
pip install nvidia-cuda-nvcc-cu11 nvidia-cudnn-cu11==8.6.0.163 tensorflow==2.12.* | |
mkdir -p $CONDA_PREFIX/lib/nvvm/libdevice | |
cp $CONDA_PREFIX/lib/libdevice.10.bc $CONDA_PREFIX/lib/nvvm/libdevice/ | |
mkdir -p $CONDA_PREFIX/etc/conda/activate.d | |
echo 'NVCC_PATH=$(dirname $(python -c "import nvidia.cuda_nvcc;print(nvidia.cuda_nvcc.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
echo 'export PATH=$PATH:$NVCC_PATH/bin' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib:$CUDNN_PATH/lib' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
echo 'export XLA_FLAGS=--xla_gpu_cuda_data_dir=$CONDA_PREFIX/lib/\n' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
# Verify install: | |
source $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh | |
python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" | |
# https://pytorch.org/get-started/locally/ | |
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 | |
# https://github.com/tensorflow/examples | |
# https://github.com/pytorch/examples |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment