bebosudo · April 10, 2019 11:09 · bebosudo · Apr 10, 2019
diff --git a/ior_submit.sh b/ior_submit.sh
 #!/usr/bin/env bash
 #SBATCH -p be-short
 #SBATCH -J bescratch_test
 #SBATCH -t 24:00:00
 #SBATCH --hint=nomultithread
 #SBATCH -d singleton
 #SBATCH --ntasks-per-node=20
 #SBATCH -n 640

 # PBS at C3HPC submit info:
 ##PBS -q gpu
 ##PBS -l walltime=24:00:00
 ##PBS -l nodes=8:ppn=20



 set -e

 [ -z ${MODULE+x} ] && MODULE="ior/3.2.0/intel/18.0.3-ep2suod"
 module load $MODULE
 [ -z ${API_SET+x} ] && API_SET="MPIIO HDF5 NCMPI"


 CERN=true
 # C3HPC=true
 if [[ $CERN == "true" ]]; then
  ORIG_DIR="$PWD"
  [ -z ${MPIRUN_BIN+x} ] && MPIRUN_BIN="srun"
  JOB_ID="$SLURM_JOBID"
  NUM_NODES="$SLURM_NNODES"; NUM_PROCS_AVAIL="$SLURM_NTASKS"; NPROCS=$NUM_PROCS_AVAIL
 else
  [ ! -z $PBS_O_WORKDIR ] && ORIG_DIR="$PBS_O_WORKDIR" || ORIG_DIR="$PWD"
  [ -z ${MPIRUN_BIN+x} ] && MPIRUN_BIN="$(which mpirun)"
  JOB_ID="$PBS_JOBID"
  NUM_NODES="$($MPIRUN_BIN hostname |sort |uniq |wc -l)"
  NUM_PROCS_AVAIL="$($MPIRUN_BIN hostname |wc -l)"; NPROCS=$NUM_PROCS_AVAIL
 fi

 cd $ORIG_DIR; echo "ORIG_DIR=$ORIG_DIR JOB_ID=$JOB_ID"


 PROC_PER_NODE=20
 # We want to simulate behaviour of a `classic' HPC application, which checkpoints
 # data from all the processes to a single file, so no repeatitions, use segment=1
 NUM_SEG=1

 IOR_BIN="$(which ior)"

 [ -z ${FILE_SIZE_MB+x} ] && FILE_SIZE_MB="20480"
 [[ $ENABLE_COLLECTIVE != "false" && $ENABLE_COLLECTIVE != "no" ]] && IOR_FLAGS="${IOR_FLAGS} -c"
 #[ -z ${NODES_TO_RUN+x} ] && NODES_TO_RUN="1 2 4 8 16 32"
 [ ! -z ${OUTPUT_FILE+x} ] && IOR_FLAGS="${IOR_FLAGS} -f ${OUTPUT_FILE}"

 # -C should fix the cache read effect
 # -e runs a fsync after write
 # -B uses O_DIRECT when writing which should solve write cache effects
 IOR_FLAGS="${IOR_FLAGS} -C -e -B"

 echo "ENABLE_COLLECTIVE='${ENABLE_COLLECTIVE}', IOR_FLAGS='${IOR_FLAGS}', MPIRUN_BIN='${MPIRUN_BIN}', FILE_SIZE_MB='${FILE_SIZE_MB}', OUTPUT_FILE='${OUTPUT_FILE}'"
 #echo "sudo drop_caches enabled"


 for FSIZE in ${FILE_SIZE_MB}; do

  for api in $API_SET; do
    #for N in $NODES_TO_RUN; do

      #NP=$(( $N * $PROC_PER_NODE ))
      NP=$NUM_PROCS_AVAIL
      BS=$(( $FSIZE / $NP ))

      #[ $NP -gt $NUM_PROCS_AVAIL ] && echo "There are less processors than the 32nodes/640procs expected" && continue

      # We created config files in hints/ containing lines like `IOR_HINT__MPI__romio_cb_write=enable`,
      # as reported on "Parallel IO performance and scalability study on the PRACE CURIE supercomputer",
      # http://www.prace-ri.eu/IMG/pdf/Parallel_IO_performance_and_scalability_study_on_the_PRACE_CURIE_supercomputer-2.pdf

      for hint_file in hints/*.conf; do
        hint_num="$(echo $hint_file |egrep -o 'hint_[[:digit:]]*.conf' |egrep -o '[[:digit:]]*')"
        echo "Nodes=$N, NP=$NP, BS=$BS, hint_file='${hint_file}'"
        [[ $MPIRUN_BIN == *mpirun ]] && MPIRUN_OPTS="-n $NP"
        [[ $MPIRUN_BIN == *srun ]]   && MPIRUN_OPTS="-N $NUM_NODES --ntasks-per-node=$PROC_PER_NODE -n $NP"

        [[ $ENABLE_INTEL_STATS != "false" && $ENABLE_INTEL_STATS != "no" ]] && export I_MPI_STATS="all" I_MPI_STATS_FILE="$ORIG_DIR/stats-$(echo $JOB_ID |cut -d'.' -f1)_n${NPROCS}_a${api}_h${hint_num}_${FSIZE}" || unset I_MPI_STATS I_MPI_STATS_FILE

        # for transfer size and block size, k and K are the same, 1k=1024.
        #sudo /usr/sbin/drop_caches  # this should make reproducible results
        [ ! -z ${OUTPUT_FILE+x} ] && echo > $OUTPUT_FILE  # Empty the output file at each execution, if defined
        $MPIRUN_BIN $MPIRUN_OPTS $IOR_BIN -a $api $IOR_FLAGS -t 2m -b ${BS}m -s 1 -i 2 -U "$hint_file"
      done
    #done
  done
 done
	#!/usr/bin/env bash
	#SBATCH -p be-short
	#SBATCH -J bescratch_test
	#SBATCH -t 24:00:00
	#SBATCH --hint=nomultithread
	#SBATCH -d singleton
	#SBATCH --ntasks-per-node=20
	#SBATCH -n 640

	# PBS at C3HPC submit info:
	##PBS -q gpu
	##PBS -l walltime=24:00:00
	##PBS -l nodes=8:ppn=20



	set -e

	[ -z ${MODULE+x} ] && MODULE="ior/3.2.0/intel/18.0.3-ep2suod"
	module load $MODULE
	[ -z ${API_SET+x} ] && API_SET="MPIIO HDF5 NCMPI"


	CERN=true
	# C3HPC=true
	if [[ $CERN == "true" ]]; then
	ORIG_DIR="$PWD"
	[ -z ${MPIRUN_BIN+x} ] && MPIRUN_BIN="srun"
	JOB_ID="$SLURM_JOBID"
	NUM_NODES="$SLURM_NNODES"; NUM_PROCS_AVAIL="$SLURM_NTASKS"; NPROCS=$NUM_PROCS_AVAIL
	else
	[ ! -z $PBS_O_WORKDIR ] && ORIG_DIR="$PBS_O_WORKDIR" \|\| ORIG_DIR="$PWD"
	[ -z ${MPIRUN_BIN+x} ] && MPIRUN_BIN="$(which mpirun)"
	JOB_ID="$PBS_JOBID"
	NUM_NODES="$($MPIRUN_BIN hostname \|sort \|uniq \|wc -l)"
	NUM_PROCS_AVAIL="$($MPIRUN_BIN hostname \|wc -l)"; NPROCS=$NUM_PROCS_AVAIL
	fi

	cd $ORIG_DIR; echo "ORIG_DIR=$ORIG_DIR JOB_ID=$JOB_ID"


	PROC_PER_NODE=20
	# We want to simulate behaviour of a `classic' HPC application, which checkpoints
	# data from all the processes to a single file, so no repeatitions, use segment=1
	NUM_SEG=1

	IOR_BIN="$(which ior)"

	[ -z ${FILE_SIZE_MB+x} ] && FILE_SIZE_MB="20480"
	[[ $ENABLE_COLLECTIVE != "false" && $ENABLE_COLLECTIVE != "no" ]] && IOR_FLAGS="${IOR_FLAGS} -c"
	#[ -z ${NODES_TO_RUN+x} ] && NODES_TO_RUN="1 2 4 8 16 32"
	[ ! -z ${OUTPUT_FILE+x} ] && IOR_FLAGS="${IOR_FLAGS} -f ${OUTPUT_FILE}"

	# -C should fix the cache read effect
	# -e runs a fsync after write
	# -B uses O_DIRECT when writing which should solve write cache effects
	IOR_FLAGS="${IOR_FLAGS} -C -e -B"

	echo "ENABLE_COLLECTIVE='${ENABLE_COLLECTIVE}', IOR_FLAGS='${IOR_FLAGS}', MPIRUN_BIN='${MPIRUN_BIN}', FILE_SIZE_MB='${FILE_SIZE_MB}', OUTPUT_FILE='${OUTPUT_FILE}'"
	#echo "sudo drop_caches enabled"


	for FSIZE in ${FILE_SIZE_MB}; do

	for api in $API_SET; do
	#for N in $NODES_TO_RUN; do

	#NP=$(( $N * $PROC_PER_NODE ))
	NP=$NUM_PROCS_AVAIL
	BS=$(( $FSIZE / $NP ))

	#[ $NP -gt $NUM_PROCS_AVAIL ] && echo "There are less processors than the 32nodes/640procs expected" && continue

	# We created config files in hints/ containing lines like `IOR_HINT__MPI__romio_cb_write=enable`,
	# as reported on "Parallel IO performance and scalability study on the PRACE CURIE supercomputer",
	# http://www.prace-ri.eu/IMG/pdf/Parallel_IO_performance_and_scalability_study_on_the_PRACE_CURIE_supercomputer-2.pdf

	for hint_file in hints/*.conf; do
	hint_num="$(echo $hint_file \|egrep -o 'hint_[[:digit:]].conf' \|egrep -o '[[:digit:]]')"
	echo "Nodes=$N, NP=$NP, BS=$BS, hint_file='${hint_file}'"
	[[ $MPIRUN_BIN == *mpirun ]] && MPIRUN_OPTS="-n $NP"
	[[ $MPIRUN_BIN == *srun ]] && MPIRUN_OPTS="-N $NUM_NODES --ntasks-per-node=$PROC_PER_NODE -n $NP"

	[[ $ENABLE_INTEL_STATS != "false" && $ENABLE_INTEL_STATS != "no" ]] && export I_MPI_STATS="all" I_MPI_STATS_FILE="$ORIG_DIR/stats-$(echo $JOB_ID \|cut -d'.' -f1)_n${NPROCS}_a${api}_h${hint_num}_${FSIZE}" \|\| unset I_MPI_STATS I_MPI_STATS_FILE

	# for transfer size and block size, k and K are the same, 1k=1024.
	#sudo /usr/sbin/drop_caches # this should make reproducible results
	[ ! -z ${OUTPUT_FILE+x} ] && echo > $OUTPUT_FILE # Empty the output file at each execution, if defined
	$MPIRUN_BIN $MPIRUN_OPTS $IOR_BIN -a $api $IOR_FLAGS -t 2m -b ${BS}m -s 1 -i 2 -U "$hint_file"
	done
	#done
	done
	done