Last active
October 25, 2022 23:53
-
-
Save nousr/4759ad960719b9dbcf28831403fd1dff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LOCAL_ID=$SLURM_LOCALID | |
GLOBAL_RANK=$SLURM_PROCID | |
NODE_ID=$SLURM_NODEID | |
NUM_PROCS=$SLURM_NTASKS | |
CPUS=$SLURM_CPUS_PER_GPU | |
NUM_NODES=$SLURM_NNODES | |
MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) | |
if [ $GLOBAL_RANK == 0 ] | |
then | |
echo -e "MASTER ADDR: $MASTER_ADDR\tLOCAL ID: $LOCAL_ID\tGLOBAL RANK: $GLOBAL_RANK\tNODE ID: $NODE_ID\tNUM PROCS: $NUM_PROCS\tCPUS PER TASK: $CPUS" | |
fi | |
echo $SLURM_JOB_NODELIST | |
# start virtual environment | |
source /fsx/nousr/DALLE2-pytorch/.env/bin/activate | |
# empty torch cache before starting | |
python3 -c "import torch; torch.cuda.empty_cache()" | |
# launch process | |
accelerate launch \ | |
--multi_gpu \ | |
--num_processes=$(( $NUM_PROCS * 8 )) \ | |
--num_cpu_threads_per_process=$CPUS \ | |
--num_machines=$NUM_NODES \ | |
--machine_rank=$NODE_ID \ | |
--gpu_ids="0,1,2,3,4,5,6,7,8" \ | |
--mixed_precision="no" \ | |
--main_process_ip=$MASTER_ADDR \ | |
--main_process_port=3068 \ | |
/fsx/nousr/DALLE2-pytorch/train_diffusion_prior.py \ | |
--config_file /fsx/nousr/DALLE2-pytorch/configs/h_14_prior.json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#SBATCH --partition=gpu | |
#SBATCH --job-name=h-14-prior | |
#SBATCH --nodes 2 | |
#SBATCH --ntasks-per-node 1 | |
#SBATCH --cpus-per-gpu=6 | |
#SBATCH --gres=gpu:8 | |
#SBATCH --output=%x_%j.out | |
#SBATCH --comment dalle2 | |
#SBATCH --exclusive | |
srun --comment dalle2 prior.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment