Last active
January 16, 2021 23:19
-
-
Save AlbertoEAF/5972db15a27c294bab65b97e1bc4c315 to your computer and use it in GitHub Desktop.
LightGBM single row predict benchmark script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* | |
* Quick & dirty Single Row Predict benchmark. | |
* | |
* | |
* Add this .cpp to a new "profiling/" folder and the following line to the end of CMakeLists.txt: | |
* | |
* OPTION(BUILD_PROFILING_TESTS "Set to ON to compile profiling executables for development and benchmarks." OFF) | |
* if(BUILD_PROFILING_TESTS) | |
* # For profiling builds with valgrind/callgrind use -DUSE_DEBUG=ON | |
* add_executable(lightgbm_profile_single_row_predict profiling/profile_single_row_predict.cpp ${SOURCES}) | |
* endif(BUILD_PROFILING_TESTS) | |
* | |
* | |
* Requirements: | |
* | |
* - Add a "LightGBM_model.txt" file at the repo root. | |
* - Adapt ``values`` below to your model to have at least 2 different input rows. | |
* | |
* Compilation: | |
* | |
* cmake .. -DBUILD_PROFILING_TESTS=ON && make -j4 | |
* | |
* Usage: | |
* | |
* time ./lightgbm_profile_single_row_predict <# threads> <# points> [f] # f uses the Fast single row prediction | |
* | |
* | |
* Alberto Ferreira, 2021 | |
*/ | |
#include <iostream> | |
#include <stdio.h> | |
#include <math.h> | |
#include <vector> | |
#include <thread> | |
#include <ctime> | |
#include <cstring> | |
#include "LightGBM/c_api.h" | |
using namespace std; | |
#define FMT_HEADER_ONLY | |
#include "LightGBM/../../external_libs/fmt/include/fmt/format.h" | |
inline void predict(BoosterHandle handle, | |
const void* data, | |
int32_t ncol, | |
int num_iterations, | |
int64_t* out_len, | |
double* out_result) { | |
if (0 != LGBM_BoosterPredictForMatSingleRow( | |
handle, | |
data, | |
C_API_DTYPE_FLOAT64, | |
ncol, | |
1, // is_row_major | |
C_API_PREDICT_NORMAL, | |
0, // start_iteration | |
num_iterations, | |
"", | |
out_len, | |
out_result)) { | |
throw std::exception(); | |
} | |
} | |
void predict_n( | |
BoosterHandle boosterHandle, | |
double *data, | |
const size_t nrows, | |
int ncol, | |
int num_iterations, | |
int64_t *out_len, | |
double* out_scores, | |
const size_t start, | |
const size_t end) { | |
for (size_t i = start; i < end; ++i) { | |
size_t nrow = i%nrows; | |
predict(boosterHandle, data + nrow*ncol, ncol, num_iterations, out_len, out_scores + i); | |
} | |
} | |
inline void predict_fast(FastConfigHandle handle, | |
const void* data, | |
int64_t* out_len, | |
double* out_result) { | |
if (0 != LGBM_BoosterPredictForMatSingleRowFast(handle, data, out_len, out_result)) { | |
throw std::exception(); | |
} | |
} | |
void predict_fast_n( | |
FastConfigHandle handle, | |
double *data, | |
const size_t nrows, | |
const size_t ncol, | |
int64_t *out_len, | |
double* out_scores, | |
const size_t start, | |
const size_t end) { | |
for (size_t i = start; i < end; ++i) { | |
size_t nrow = i%nrows; | |
predict_fast(handle, data + nrow*ncol, out_len, out_scores + i); | |
} | |
} | |
int main(int argc, char **argv) { | |
// Input parsing & experiment setup: | |
if (argc < 2) { | |
// argv[1] = #threads | |
// argv[2] == "f" ? => Use Fast variant. | |
cout << "Please pass #threads!\n"; | |
exit(1); | |
} | |
const int nthreads = std::atoi(argv[1]); | |
const size_t N_PREDICTIONS = size_t(std::atol(argv[2])); | |
bool fast_mode = strcmp(argv[3], "f") == 0; | |
cout << "fast_mode=" << fast_mode << "\n"; | |
cout << "start\n"; | |
BoosterHandle boosterHandle; | |
int num_iterations; | |
LGBM_BoosterCreateFromModelfile("./LightGBM_model.txt", &num_iterations, &boosterHandle); | |
cout << "Model iterations " << num_iterations<< "\n"; | |
/* | |
Dataset: | |
feature_names=amount num1_float num2_double num3_int | |
fraud := 400<amount<700 & cat1_string="C"~=2 & num1_float < 70 | |
Use input "rows" that provide different output scores to ensure thread-safety: | |
*/ | |
double values[] = { | |
0.25, 1.4, 0.12, -0.5, | |
500, 2, 9999, 200, | |
}; | |
const size_t NROWS=2; | |
const int NUM_FEATURES = 4; | |
double ref_scores[NUM_FEATURES * NROWS]; | |
int64_t dummy_out_len; | |
std::vector<double> scores(N_PREDICTIONS); | |
FastConfigHandle fastConfigHandle; | |
LGBM_BoosterPredictForMatSingleRowFastInit(boosterHandle, C_API_PREDICT_NORMAL, 0, num_iterations, C_API_DTYPE_FLOAT64, NUM_FEATURES, "", &fastConfigHandle); | |
// Generate 2 distinct reference scores - 1 per input row: | |
predict(boosterHandle, values, NUM_FEATURES, num_iterations, &dummy_out_len, &ref_scores[0]); | |
predict(boosterHandle, values+NUM_FEATURES, NUM_FEATURES, num_iterations, &dummy_out_len, &ref_scores[1]); | |
fmt::print("Ref scores: {:.6g}, {:.6g}\n", ref_scores[0], ref_scores[1]); | |
// Schedule work //////////////////////////////////////////////////////////////////////////////////////////// | |
const size_t full_span = scores.size(); | |
const size_t base_thread_span = full_span / nthreads; | |
fmt::print("Work span={}, {} threads, items/thread ~= {}\n", full_span, nthreads, base_thread_span); | |
auto t0 = std::clock(); | |
std::vector<std::thread> threads; | |
for (int nthread = 0; nthread < nthreads; ++nthread) { | |
const size_t start = nthread * base_thread_span; | |
const size_t end = nthread < nthreads-1 ? start + base_thread_span : full_span; | |
fmt::print("Thread {} [{}:{}] ({} items)\n", nthread, start, end, end-start); | |
if (fast_mode) { | |
threads.push_back(std::thread(&predict_fast_n, fastConfigHandle, values, NROWS, NUM_FEATURES, &dummy_out_len, scores.data(), start, end)); | |
} else { | |
threads.push_back(std::thread(&predict_n, boosterHandle, values, NROWS, NUM_FEATURES, num_iterations, &dummy_out_len, scores.data(), start, end)); | |
} | |
} | |
for (auto &th: threads) | |
th.join(); | |
// Check output scores against reference scores ///////////////////////////////////////////////////////////// | |
for (size_t i = 0; i < N_PREDICTIONS; ++i) { | |
const size_t row = i%2; | |
const double error = scores[i]-ref_scores[row]; | |
if (abs(error) > 1e-30) { | |
fmt::print("{} Score {} ref_score {}\n", i, scores[i], ref_scores[row]); | |
fmt::print("{} Score error: {}\n", i, error); | |
} | |
} | |
cout << "len=" << dummy_out_len << endl; | |
cout << "end\n"; | |
auto t_exec = double(clock() - t0) / CLOCKS_PER_SEC; | |
cout << "Executed in " << t_exec << "s\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment