-
-
Save bryan-lunt/bf34992d46af04b11b3f3a5adcb9b3e8 to your computer and use it in GitHub Desktop.
google/benchmark example with MPI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for gist naming purposes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Distributed under the MIT license | |
cmake_minimum_required(VERSION 3.5) | |
project("example") | |
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) | |
find_package(MPI) | |
find_package(GBenchmark) | |
include_directories(${MPI_C_INCLUDE_PATH}) | |
add_executable(example main.cc) | |
target_compile_features(example PRIVATE cxx_auto_type) | |
target_link_libraries(example ${MPI_LIBRARIES} gbenchmark) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Distributed under the MIT license | |
# Defines the following variables | |
# | |
# - GBENCHMARK_FOUND if the library is found | |
# - GBENCHMARK_LIBRARY is the path to the library | |
# - GBENCHMARK_INCLUDE_DIR is the path to the include directory | |
if(GBENCHMARK_FOUND) | |
return() | |
endif() | |
find_library(GBENCHMARK_LIBRARY benchmark DOC "Path to the google benchmark library") | |
find_path( | |
GBENCHMARK_INCLUDE_DIR benchmark/benchmark.h | |
DOC "Path to google benchmark include directory" | |
PATHS "${casapath}" | |
) | |
include(FindPackageHandleStandardArgs) | |
FIND_PACKAGE_HANDLE_STANDARD_ARGS( | |
GBENCHMARK | |
REQUIRED_VARS GBENCHMARK_LIBRARY GBENCHMARK_INCLUDE_DIR | |
) | |
if(GBENCHMARK_FOUND) | |
if(GBENCHMARK_LIBRARY MATCHES "\\.a$") | |
add_library(gbenchmark STATIC IMPORTED GLOBAL) | |
else() | |
add_library(gbenchmark SHARED IMPORTED GLOBAL) | |
endif() | |
set_target_properties(gbenchmark PROPERTIES | |
IMPORTED_LOCATION "${GBENCHMARK_LIBRARY}" | |
INTERFACE_INCLUDE_DIRECTORIES "${GBENCHMARK_INCLUDE_DIR}") | |
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Distributed under the MIT license | |
// The following demonstrates how to use google/benchmark with MPI enabled | |
// codes. There are three core aspects: | |
// | |
// 1. The time it takes to run a single iteration of a particular benchmark is | |
// the maximum time across all processes. In a two process job, if process 0 | |
// takes 1 second to do its work, and process 1 takes 1.5 seconds, then 1.5 | |
// seconds is the time recorded for benchmarking purposes. | |
// 2. Only the root process is allowed to report. Other processes report via a | |
// "NullReporter". | |
// 3. The main is modified to initialize and finalize MPI. It is also modified | |
// for the purpose of point 2 above. | |
// | |
// Note: The efficiency of MPI algorithm often depends on interweaving compute | |
// and communication. Depending on how it is applied, benchmarking, and | |
// especially micro-benchmarking, may not capture this aspect. | |
// | |
#include <mpi.h> | |
#include <benchmark/benchmark.h> | |
#include <chrono> | |
#include <thread> | |
namespace { | |
// The unit of code we want to benchmark | |
void i_am_sleepy(int macsleepface) { | |
// Pretend to work ... | |
std::this_thread::sleep_for(std::chrono::milliseconds(macsleepface)); | |
// ... as a team | |
MPI_Barrier(MPI_COMM_WORLD); | |
} | |
void mpi_benchmark(benchmark::State &state) { | |
double max_elapsed_second; | |
int rank; | |
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
while(state.KeepRunning()) { | |
// Do the work and time it on each proc | |
auto start = std::chrono::high_resolution_clock::now(); | |
i_am_sleepy(rank % 5); | |
auto end = std::chrono::high_resolution_clock::now(); | |
// Now get the max time across all procs: | |
// for better or for worse, the slowest processor is the one that is | |
// holding back the others in the benchmark. | |
auto const duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); | |
auto elapsed_seconds = duration.count(); | |
MPI_Allreduce(&elapsed_seconds, &max_elapsed_second, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); | |
state.SetIterationTime(max_elapsed_second); | |
} | |
} | |
} | |
BENCHMARK(mpi_benchmark)->UseManualTime(); | |
// This reporter does nothing. | |
// We can use it to disable output from all but the root process | |
class NullReporter : public ::benchmark::BenchmarkReporter { | |
public: | |
NullReporter() {} | |
virtual bool ReportContext(const Context &) {return true;} | |
virtual void ReportRuns(const std::vector<Run> &) {} | |
virtual void Finalize() {} | |
}; | |
// The main is rewritten to allow for MPI initializing and for selecting a | |
// reporter according to the process rank | |
int main(int argc, char **argv) { | |
MPI_Init(&argc, &argv); | |
int rank; | |
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
::benchmark::Initialize(&argc, argv); | |
if(rank == 0) | |
// root process will use a reporter from the usual set provided by | |
// ::benchmark | |
::benchmark::RunSpecifiedBenchmarks(); | |
else { | |
// reporting from other processes is disabled by passing a custom reporter | |
NullReporter null; | |
::benchmark::RunSpecifiedBenchmarks(&null); | |
} | |
MPI_Finalize(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment