janosh · November 12, 2024 04:50 · mumu6651 · Jul 20, 2024
diff --git a/compile-vasp-m1.md b/compile-vasp-m1.md
diff --git a/makefile.include b/makefile.include
 # Default precompiler options
 CPP_OPTIONS = -DHOST=\"LinuxGNU\" \
              -DMPI -DMPI_BLOCK=8000 -Duse_collective \
              -DscaLAPACK \
              -DCACHE_SIZE=4000 \
              -Davoidalloc \
              -Dvasp6 \
              -Duse_bse_te \
              -Dtbdyn \
              -Dfock_dblbuf \
              -D_OPENMP \
              -Dqd_emulate

 CPP         = gcc-13 -E -C -w $*$(FUFFIX) >$*$(SUFFIX) $(CPP_OPTIONS)

 FC          = mpif90 -fopenmp
 FCL         = mpif90 -fopenmp

 FREE        = -ffree-form -ffree-line-length-none

 FFLAGS      = -w -ffpe-summary=invalid,zero,overflow -L /opt/homebrew/Cellar/gcc/13.2.0/lib/gcc/13

 OFLAG       = -O2
 OFLAG_IN    = $(OFLAG)
 DEBUG       = -O0

 OBJECTS     = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
 OBJECTS_O1 += fftw3d.o fftmpi.o fftmpiw.o
 OBJECTS_O2 += fft3dlib.o

 # For what used to be vasp.5.lib
 CPP_LIB     = $(CPP)
 FC_LIB      = $(FC)
 CC_LIB      = gcc-13
 CFLAGS_LIB  = -O
 FFLAGS_LIB  = -O1
 FREE_LIB    = $(FREE)

 OBJECTS_LIB = linpack_double.o getshmem.o

 # For the parser library
 CXX_PARS = g++-13
 LIBS += parser
 LLIBS = -Lparser -lparser -lstdc++
 QD ?= /opt/homebrew
 LLIBS += -L$(QD)/lib -lqdmod -lqd
 INCS += -I$(QD)/include/qd

 ##
 ## Customize as of this point! Of course you may change the preceding
 ## part of this file as well if you like, but it should rarely be
 ## necessary ...
 ##

 # When compiling on the target machine itself, change this to the
 # relevant target when cross-compiling for another architecture
 FFLAGS     += -march=native

 # For gcc-10 and higher (comment out for older versions)
 FFLAGS     += -fallow-argument-mismatch

 # BLAS and LAPACK (mandatory)
 OPENBLAS_ROOT ?= /opt/homebrew/Cellar/openblas/0.3.26
 BLASPACK    = -L$(OPENBLAS_ROOT)/lib -lopenblas

 # scaLAPACK (mandatory)
 SCALAPACK_ROOT ?= /opt/homebrew
 SCALAPACK   = -L$(SCALAPACK_ROOT)/lib -lscalapack

 LLIBS      += $(SCALAPACK) $(BLASPACK)

 # FFTW (mandatory)
 FFTW_ROOT  ?= /opt/homebrew
 LLIBS      += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp
 INCS       += -I$(FFTW_ROOT)/include

 # HDF5-support (optional but strongly recommended)
 #CPP_OPTIONS+= -DVASP_HDF5
 #HDF5_ROOT  ?= /path/to/your/hdf5/installation
 #LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran
 #INCS       += -I$(HDF5_ROOT)/include

 # For the VASP-2-Wannier90 interface (optional)
 #CPP_OPTIONS    += -DVASP2WANNIER90
 #WANNIER90_ROOT ?= /path/to/your/wannier90/installation
 #LLIBS          += -L$(WANNIER90_ROOT)/lib -lwannier

 # For the fftlib library (experimental)
 #CPP_OPTIONS+= -Dsysv
 #FCL        += fftlib.o
 #CXX_FFTLIB  = g++-13 -fopenmp -std=c++11 -DFFTLIB_THREADSAFE
 #INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include
 #LIBS       += fftlib
 #LLIBS      += -ldl
diff --git a/vasp-perf-grid-search.py b/vasp-perf-grid-search.py
 """This script grid-searches OMP_NUM_THREADS, NCORE and number of MPI processes for
 minimal VASP runtime on a simple Si2 relaxation.

 It writes the results to CSV and copies
 markdown table to clipboard. Requires Python 3.10. To keep a log, invoke with

 python vasp-perf-grid-search.py 2>&1 | tee Si-relax.log

 To install OpenMPI's mpiexec on macOS, use Homebrew:
 brew install open-mpi
 """

 import os
 import warnings
 from itertools import product
 from time import perf_counter, sleep

 import pandas as pd
 from atomate2.vasp.jobs.core import RelaxMaker
 from atomate2.vasp.powerups import update_user_incar_settings
 from jobflow import run_locally
 from pandas.io.clipboard import clipboard_set
 from pymatgen.core import Structure

 warnings.filterwarnings("ignore")  # hide pymatgen warnings clogging up the logs

 VASP_BIN = "/Users/janosh/dev/vasp/compiled/vasp_std_6.3.0_m1"
 results: list[tuple[int, int, int, float]] = []

 # construct an FCC silicon structure
 si_structure = Structure(
    lattice=[[0, 2.73, 2.73], [2.73, 0, 2.73], [2.73, 2.73, 0]],
    species=["Si", "Si"],
    coords=[[0, 0, 0], [0.25, 0.25, 0.25]],
 )

 # grid-search OMP_NUM_THREADS, NCORE and number of MPI processes
 try:
    prod = list(product([1, 2, 4, 8], [1, 2], [2, 4]))
    for idx, (n_proc, n_threads, n_core) in enumerate(prod, 1):
        os.environ["OMP_NUM_THREADS"] = str(n_threads)

        print(f"Run {idx} / {len(prod)}")

        # make a relax job to optimize the structure
        relax_job = RelaxMaker(
            run_vasp_kwargs={"vasp_cmd": f"mpiexec -np {n_proc} {VASP_BIN}"},
        ).make(si_structure)

        relax_job = update_user_incar_settings(relax_job, {"NCORE": n_core})

        start = perf_counter()
        # run the job
        run_locally(relax_job, create_folders=True, ensure_success=True)

        elapsed = perf_counter() - start
        print(
            f"run with {n_proc=}, {n_threads=}, {n_core=} took {elapsed:.1f} sec",
        )
        results += [(n_proc, n_threads, n_core, elapsed)]

        print("Waiting 10 secs to cooldown...\n\n", flush=True)
        sleep(10)  # so every run is a bit more like the first


 except KeyboardInterrupt:  # exit gracefully on ctrl+c and write partial results
    print("Job was interrupted")


 df_perf = pd.DataFrame(results, columns=["n_proc", "n_threads", "n_core", "elapsed"])
 df_perf.round(2).to_csv("vasp-perf-results.csv")
 clipboard_set(df_perf.to_markdown())
	n_proc	n_threads	n_core	elapsed (sec)
0	1	1	2	93.3
1	1	1	4	92.8
2	1	2	2	82.8
3	1	2	4	82.7
4	2	1	2	42.8
5	2	1	4	42.9
6	2	2	2	52.9
7	2	2	4	52.7
8	4	1	2	32.9
9	4	1	4	32.9
10	4	2	2	52.9
11	4	2	4	53.0
12	8	1	2	32.8
13	8	1	4	22.8
14	8	2	2	62.8
15	8	2	4	62.9
	# Default precompiler options
	CPP_OPTIONS = -DHOST=\"LinuxGNU\" \
	-DMPI -DMPI_BLOCK=8000 -Duse_collective \
	-DscaLAPACK \
	-DCACHE_SIZE=4000 \
	-Davoidalloc \
	-Dvasp6 \
	-Duse_bse_te \
	-Dtbdyn \
	-Dfock_dblbuf \
	-D_OPENMP \
	-Dqd_emulate

	CPP = gcc-13 -E -C -w $$(FUFFIX) >$$(SUFFIX) $(CPP_OPTIONS)

	FC = mpif90 -fopenmp
	FCL = mpif90 -fopenmp

	FREE = -ffree-form -ffree-line-length-none

	FFLAGS = -w -ffpe-summary=invalid,zero,overflow -L /opt/homebrew/Cellar/gcc/13.2.0/lib/gcc/13

	OFLAG = -O2
	OFLAG_IN = $(OFLAG)
	DEBUG = -O0

	OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
	OBJECTS_O1 += fftw3d.o fftmpi.o fftmpiw.o
	OBJECTS_O2 += fft3dlib.o

	# For what used to be vasp.5.lib
	CPP_LIB = $(CPP)
	FC_LIB = $(FC)
	CC_LIB = gcc-13
	CFLAGS_LIB = -O
	FFLAGS_LIB = -O1
	FREE_LIB = $(FREE)

	OBJECTS_LIB = linpack_double.o getshmem.o

	# For the parser library
	CXX_PARS = g++-13
	LIBS += parser
	LLIBS = -Lparser -lparser -lstdc++
	QD ?= /opt/homebrew
	LLIBS += -L$(QD)/lib -lqdmod -lqd
	INCS += -I$(QD)/include/qd

	##
	## Customize as of this point! Of course you may change the preceding
	## part of this file as well if you like, but it should rarely be
	## necessary ...
	##

	# When compiling on the target machine itself, change this to the
	# relevant target when cross-compiling for another architecture
	FFLAGS += -march=native

	# For gcc-10 and higher (comment out for older versions)
	FFLAGS += -fallow-argument-mismatch

	# BLAS and LAPACK (mandatory)
	OPENBLAS_ROOT ?= /opt/homebrew/Cellar/openblas/0.3.26
	BLASPACK = -L$(OPENBLAS_ROOT)/lib -lopenblas

	# scaLAPACK (mandatory)
	SCALAPACK_ROOT ?= /opt/homebrew
	SCALAPACK = -L$(SCALAPACK_ROOT)/lib -lscalapack

	LLIBS += $(SCALAPACK) $(BLASPACK)

	# FFTW (mandatory)
	FFTW_ROOT ?= /opt/homebrew
	LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp
	INCS += -I$(FFTW_ROOT)/include

	# HDF5-support (optional but strongly recommended)
	#CPP_OPTIONS+= -DVASP_HDF5
	#HDF5_ROOT ?= /path/to/your/hdf5/installation
	#LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
	#INCS += -I$(HDF5_ROOT)/include

	# For the VASP-2-Wannier90 interface (optional)
	#CPP_OPTIONS += -DVASP2WANNIER90
	#WANNIER90_ROOT ?= /path/to/your/wannier90/installation
	#LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier

	# For the fftlib library (experimental)
	#CPP_OPTIONS+= -Dsysv
	#FCL += fftlib.o
	#CXX_FFTLIB = g++-13 -fopenmp -std=c++11 -DFFTLIB_THREADSAFE
	#INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include
	#LIBS += fftlib
	#LLIBS += -ldl
	"""This script grid-searches OMP_NUM_THREADS, NCORE and number of MPI processes for
	minimal VASP runtime on a simple Si2 relaxation.

	It writes the results to CSV and copies
	markdown table to clipboard. Requires Python 3.10. To keep a log, invoke with

	python vasp-perf-grid-search.py 2>&1 \| tee Si-relax.log

	To install OpenMPI's mpiexec on macOS, use Homebrew:
	brew install open-mpi
	"""

	import os
	import warnings
	from itertools import product
	from time import perf_counter, sleep

	import pandas as pd
	from atomate2.vasp.jobs.core import RelaxMaker
	from atomate2.vasp.powerups import update_user_incar_settings
	from jobflow import run_locally
	from pandas.io.clipboard import clipboard_set
	from pymatgen.core import Structure

	warnings.filterwarnings("ignore") # hide pymatgen warnings clogging up the logs

	VASP_BIN = "/Users/janosh/dev/vasp/compiled/vasp_std_6.3.0_m1"
	results: list[tuple[int, int, int, float]] = []

	# construct an FCC silicon structure
	si_structure = Structure(
	lattice=[[0, 2.73, 2.73], [2.73, 0, 2.73], [2.73, 2.73, 0]],
	species=["Si", "Si"],
	coords=[[0, 0, 0], [0.25, 0.25, 0.25]],
	)

	# grid-search OMP_NUM_THREADS, NCORE and number of MPI processes
	try:
	prod = list(product([1, 2, 4, 8], [1, 2], [2, 4]))
	for idx, (n_proc, n_threads, n_core) in enumerate(prod, 1):
	os.environ["OMP_NUM_THREADS"] = str(n_threads)

	print(f"Run {idx} / {len(prod)}")

	# make a relax job to optimize the structure
	relax_job = RelaxMaker(
	run_vasp_kwargs={"vasp_cmd": f"mpiexec -np {n_proc} {VASP_BIN}"},
	).make(si_structure)

	relax_job = update_user_incar_settings(relax_job, {"NCORE": n_core})

	start = perf_counter()
	# run the job
	run_locally(relax_job, create_folders=True, ensure_success=True)

	elapsed = perf_counter() - start
	print(
	f"run with {n_proc=}, {n_threads=}, {n_core=} took {elapsed:.1f} sec",
	)
	results += [(n_proc, n_threads, n_core, elapsed)]

	print("Waiting 10 secs to cooldown...\n\n", flush=True)
	sleep(10) # so every run is a bit more like the first


	except KeyboardInterrupt: # exit gracefully on ctrl+c and write partial results
	print("Job was interrupted")


	df_perf = pd.DataFrame(results, columns=["n_proc", "n_threads", "n_core", "elapsed"])
	df_perf.round(2).to_csv("vasp-perf-results.csv")
	clipboard_set(df_perf.to_markdown())