Skip to content

Instantly share code, notes, and snippets.

@Quentin-Anthony
Created December 3, 2024 20:00
Show Gist options
  • Save Quentin-Anthony/0388fb41487af753a6418d51e601a9fb to your computer and use it in GitHub Desktop.
Save Quentin-Anthony/0388fb41487af753a6418d51e601a9fb to your computer and use it in GitHub Desktop.
Builds PyTorch from source with MPI as the distributed backend
#!/bin/bash
#SBATCH -t 2:00:00
#SBATCH -N 1
#SBATCH -p a100
#SBATCH --gpus-per-node=2
GCC_VERSION="10.3.0"
CUDA_VERSION="11.6"
TORCH_VERSION="1.13.1"
MV2_VERSION="realease-plus-3.0a2"
OUTFILE="/home/xu.3304/logs/torch_install/torch${TORCH_VERSION}_${MV2_VERSION}_cuda${CUDA_VERSION}_gcc${GCC_VERSION}.log"
MV2_BUILD="/home/xu.3304/mvapich2-install/nvidia/release-plus-3.0a2/8201a3750b_cuda11.6"
CONDA_ENV="release-plus-3.0a2-torch-hvd-nvidia"
BRANCH="v1.12.1-cudaMPI"
module purge
module load gcc/$GCC_VERSION
module load cmake
module load cuda/$CUDA_VERSION
export CUDA_HOME=/opt/cuda/$CUDA_VERSION
export CPATH=$CUDA_HOME/include:$CPATH
export CUDNN_LIB_DIR=/home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib
export CUDNN_INCLUDE_DIR=/home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include
cd /home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive
source setup.sh
# cd /home/xu.3304/zfp_compression
# source setup.sh
cd $MV2_BUILD
source setup.sh
cd
source /home/xu.3304/miniconda3/etc/profile.d/conda.sh
conda activate $CONDA_ENV
cd
rm -rf pytorch-fork/*
cd pytorch-fork
git clone [email protected]:R0n12/pytorch.git
cd pytorch
# MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 USE_GLOO=0 /home/xu.3304/miniconda3/envs/$CONDA_ENV/bin/python setup.py install > $OUTFILE 2>&1
(git clean -fdx ;\
git checkout $BRANCH ;\
git submodule sync ;\
git submodule update --init --recursive ;\
make clean ;\
python setup.py clean ;\
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} ;\
which mpicc ;\
which mpicxx ;\
mpiname -a ;\
which conda ;\
which python ;\
module list ;\
MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 /home/xu.3304/miniconda3/envs/$CONDA_ENV/bin/python setup.py develop) > $OUTFILE 2>&1
# MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 python setup.py develop > log 2>&1
# USE_CUDA_MPI=1
# BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 USE_GLOO=0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment