Created
December 3, 2024 20:00
-
-
Save Quentin-Anthony/0388fb41487af753a6418d51e601a9fb to your computer and use it in GitHub Desktop.
Builds PyTorch from source with MPI as the distributed backend
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#SBATCH -t 2:00:00 | |
#SBATCH -N 1 | |
#SBATCH -p a100 | |
#SBATCH --gpus-per-node=2 | |
GCC_VERSION="10.3.0" | |
CUDA_VERSION="11.6" | |
TORCH_VERSION="1.13.1" | |
MV2_VERSION="realease-plus-3.0a2" | |
OUTFILE="/home/xu.3304/logs/torch_install/torch${TORCH_VERSION}_${MV2_VERSION}_cuda${CUDA_VERSION}_gcc${GCC_VERSION}.log" | |
MV2_BUILD="/home/xu.3304/mvapich2-install/nvidia/release-plus-3.0a2/8201a3750b_cuda11.6" | |
CONDA_ENV="release-plus-3.0a2-torch-hvd-nvidia" | |
BRANCH="v1.12.1-cudaMPI" | |
module purge | |
module load gcc/$GCC_VERSION | |
module load cmake | |
module load cuda/$CUDA_VERSION | |
export CUDA_HOME=/opt/cuda/$CUDA_VERSION | |
export CPATH=$CUDA_HOME/include:$CPATH | |
export CUDNN_LIB_DIR=/home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib | |
export CUDNN_INCLUDE_DIR=/home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include | |
cd /home/xu.3304/cudnn-linux-x86_64-8.7.0.84_cuda11-archive | |
source setup.sh | |
# cd /home/xu.3304/zfp_compression | |
# source setup.sh | |
cd $MV2_BUILD | |
source setup.sh | |
cd | |
source /home/xu.3304/miniconda3/etc/profile.d/conda.sh | |
conda activate $CONDA_ENV | |
cd | |
rm -rf pytorch-fork/* | |
cd pytorch-fork | |
git clone [email protected]:R0n12/pytorch.git | |
cd pytorch | |
# MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 USE_GLOO=0 /home/xu.3304/miniconda3/envs/$CONDA_ENV/bin/python setup.py install > $OUTFILE 2>&1 | |
(git clean -fdx ;\ | |
git checkout $BRANCH ;\ | |
git submodule sync ;\ | |
git submodule update --init --recursive ;\ | |
make clean ;\ | |
python setup.py clean ;\ | |
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} ;\ | |
which mpicc ;\ | |
which mpicxx ;\ | |
mpiname -a ;\ | |
which conda ;\ | |
which python ;\ | |
module list ;\ | |
MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 /home/xu.3304/miniconda3/envs/$CONDA_ENV/bin/python setup.py develop) > $OUTFILE 2>&1 | |
# MAX_JOBS=4 BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 python setup.py develop > log 2>&1 | |
# USE_CUDA_MPI=1 | |
# BUILD_TEST=0 BUILD_MOBILE_BENCHMARK=0 BUILD_MOBILE_TEST=0 USE_GLOO=0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment