Created
July 21, 2023 23:01
-
-
Save Neeratyoy/f1bc8566f438f3ec210e8d0327c9dd54 to your computer and use it in GitHub Desktop.
The goal of this script is to serve as an example of ensuring that all sub-tasks within a job are allocated different GPUs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################################################################ | |
# The goal of this script: | |
# Run a job where 4 workers are triggered AT THE SAME TIME | |
# and each worker performs a model training using one GPU. | |
# Run an array job of such jobs. | |
# | |
# NOTE: the solutions to SLURM issues are dependent on the | |
# setup and vary from cluster to cluster. | |
################################################################ | |
#SBATCH --time 2-00:00 | |
#SBATCH --job-name max24hrs | |
#SBATCH --partition ... | |
#SBATCH --array 0-11%4 | |
#SBATCH --error ... | |
#SBATCH --output ... | |
#SBATCH --gres=gpu:4 | |
#SBATCH -c 8 | |
#SBATCH --mem-per-cpu 12000 | |
for i in $(seq 1 4); do | |
srun --ntasks 1 --cpus-per-task 2 --gres=gpu:1 --exclusive python -m script_to_run_.py --experiment_args $SLURM_ARRAY_TASK_ID & # the & is important | |
done | |
wait # this is important for the job to not be killed along with the background processes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment