minimal example of using `foreach` parallelism on a clusters managed by SLURM

Job script:

#!/bin/bash

#SBATCH --job-name=rfee
#SBATCH --workdir=/home/user.name/rfee/
#SBATCH --output=r_foreach_example_console_output.txt
#SBATCH --mem-per-cpu=100 # specify RAM per CPU here in Mb
#SBATCH --time=0:02:00
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4 # specify number of CPUs to use here

module load r-3.5.1-gcc-4.8.5-esnjoca

R --file=./r_foreach_example.R

R Script (r_foreach_example.R):

# specify the path from which to load R packages

.libPaths('/home/user.name/R')

# load the packages:

library('doMC') # note: loading 'doMC' also loads 'foreach'

# a simple function to execute first in serial then again in parallel

f1 <- function(x){
  Sys.sleep(2)
  return(x)
  }


# import the number of available CPUs
# this is the number we supplied at the line
#SBATCH --cpus-per-task=4
# in our .sh file

n.cpus <- Sys.getenv("SLURM_CPUS_PER_TASK")

n.cpus

class(n.cpus)

# we need this to be numeric below so:

n.cpus <- as.numeric(n.cpus)

n.cpus

class(n.cpus)

# register a parallel backend specifying the number of CPUs as the number we imported via Sys.getenv()

registerDoMC(cores = n.cpus) 

# run a serial foreach loop

system.time(
  s1 <- foreach(i = 1:4, .combine = c) %do%
    f1(i)
)  

# run a parallel foreach loop

system.time(
  s2 <- foreach(i = 1:4, .combine = c) %dopar%
    f1(i)
)  

# the parallel foreach loop should be faster

brfitzpatrick/SLURM_foreach.md