Created
January 12, 2024 09:14
-
-
Save cyb70289/86f7f600a110d69de99d4aff36c1818b to your computer and use it in GitHub Desktop.
bw-test.cc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// tested with g++-10.5, probably okay for other versions as | |
// the code is quite simple, check assembly to make sure | |
// g++ -std=c++11 -O3 -pthread -static bw-test.cc -o bw-test | |
// XXX: it costs about half minute to compile this file | |
#include <cstdlib> | |
#include <iostream> | |
#include <thread> | |
#include <vector> | |
// 4M code size: 8000 distinct functions, each function is 512 bytes aligned | |
constexpr int _n_funcs = 8000; | |
// 8M data size per thread | |
constexpr int _data_size = 8 * 1000 * 1000; | |
// data size processed for each function, i.e., 1/8000 of all data | |
constexpr int _ints_per_func = _data_size / _n_funcs / sizeof(int); | |
/* | |
* function prototype: | |
* | |
* // define a dummy variable just to make sure all functions are | |
* // different and the compiler won't merge them to one function | |
* volatile int _u000000; | |
* | |
* __attribute__ ((noinline)) | |
* __attribute__ ((aligned(512))) | |
* void f000000(volatile int** pibuf) { | |
* _u000000 = 1; | |
* volatile int* ibuf = *pibuf; | |
* for (int i = 0; i < _ints_per_func; ++i) { | |
* ++ibuf[i]; | |
* } | |
* (*pibuf) += _ints_per_func; | |
* } | |
*/ | |
#define F1(a, b, c) \ | |
volatile int _u ## a ## b ## c; \ | |
__attribute__ ((noinline)) \ | |
__attribute__ ((aligned(512))) \ | |
void f ## a ## b ## c(volatile int** pibuf) { \ | |
_u ## a ## b ## c = 1; \ | |
volatile int* ibuf = *pibuf; \ | |
for (int i = 0; i < _ints_per_func; ++i) { \ | |
++ibuf[i]; \ | |
} \ | |
(*pibuf) += _ints_per_func; \ | |
} | |
// define 20 funcs: fxxxx00, ..., fxxxx19 | |
#define F20(a,b) \ | |
F1(a,b,00) \ | |
F1(a,b,01) \ | |
F1(a,b,02) \ | |
F1(a,b,03) \ | |
F1(a,b,04) \ | |
F1(a,b,05) \ | |
F1(a,b,06) \ | |
F1(a,b,07) \ | |
F1(a,b,08) \ | |
F1(a,b,09) \ | |
F1(a,b,10) \ | |
F1(a,b,11) \ | |
F1(a,b,12) \ | |
F1(a,b,13) \ | |
F1(a,b,14) \ | |
F1(a,b,15) \ | |
F1(a,b,16) \ | |
F1(a,b,17) \ | |
F1(a,b,18) \ | |
F1(a,b,19) | |
// define 20*20=400 funs: fxx0000, ..., fxx1919 | |
#define F400(a) \ | |
F20(a,00) \ | |
F20(a,01) \ | |
F20(a,02) \ | |
F20(a,03) \ | |
F20(a,04) \ | |
F20(a,05) \ | |
F20(a,06) \ | |
F20(a,07) \ | |
F20(a,08) \ | |
F20(a,09) \ | |
F20(a,10) \ | |
F20(a,11) \ | |
F20(a,12) \ | |
F20(a,13) \ | |
F20(a,14) \ | |
F20(a,15) \ | |
F20(a,16) \ | |
F20(a,17) \ | |
F20(a,18) \ | |
F20(a,19) | |
// define 20*400=8000 functions: f000000, ..., f191919 | |
F400(00) | |
F400(01) | |
F400(02) | |
F400(03) | |
F400(04) | |
F400(05) | |
F400(06) | |
F400(07) | |
F400(08) | |
F400(09) | |
F400(10) | |
F400(11) | |
F400(12) | |
F400(13) | |
F400(14) | |
F400(15) | |
F400(16) | |
F400(17) | |
F400(18) | |
F400(19) | |
// call function | |
#define CALL_F1(a,b,c,pibuf) f ## a ## b ## c(pibuf) | |
// call 20 functions | |
#define CALL_F20(a,b,pibuf) \ | |
CALL_F1(a,b,00,pibuf); \ | |
CALL_F1(a,b,01,pibuf); \ | |
CALL_F1(a,b,02,pibuf); \ | |
CALL_F1(a,b,03,pibuf); \ | |
CALL_F1(a,b,04,pibuf); \ | |
CALL_F1(a,b,05,pibuf); \ | |
CALL_F1(a,b,06,pibuf); \ | |
CALL_F1(a,b,07,pibuf); \ | |
CALL_F1(a,b,08,pibuf); \ | |
CALL_F1(a,b,09,pibuf); \ | |
CALL_F1(a,b,10,pibuf); \ | |
CALL_F1(a,b,11,pibuf); \ | |
CALL_F1(a,b,12,pibuf); \ | |
CALL_F1(a,b,13,pibuf); \ | |
CALL_F1(a,b,14,pibuf); \ | |
CALL_F1(a,b,15,pibuf); \ | |
CALL_F1(a,b,16,pibuf); \ | |
CALL_F1(a,b,17,pibuf); \ | |
CALL_F1(a,b,18,pibuf); \ | |
CALL_F1(a,b,19,pibuf); | |
// call 20*20=400 functions | |
#define CALL_F400(a,pibuf) \ | |
CALL_F20(a,00,pibuf); \ | |
CALL_F20(a,01,pibuf); \ | |
CALL_F20(a,02,pibuf); \ | |
CALL_F20(a,03,pibuf); \ | |
CALL_F20(a,04,pibuf); \ | |
CALL_F20(a,05,pibuf); \ | |
CALL_F20(a,06,pibuf); \ | |
CALL_F20(a,07,pibuf); \ | |
CALL_F20(a,08,pibuf); \ | |
CALL_F20(a,09,pibuf); \ | |
CALL_F20(a,10,pibuf); \ | |
CALL_F20(a,11,pibuf); \ | |
CALL_F20(a,12,pibuf); \ | |
CALL_F20(a,13,pibuf); \ | |
CALL_F20(a,14,pibuf); \ | |
CALL_F20(a,15,pibuf); \ | |
CALL_F20(a,16,pibuf); \ | |
CALL_F20(a,17,pibuf); \ | |
CALL_F20(a,18,pibuf); \ | |
CALL_F20(a,19,pibuf); | |
// call 20*400=8000 functions | |
#define CALL_F8000(pibuf) \ | |
CALL_F400(00, pibuf); \ | |
CALL_F400(01, pibuf); \ | |
CALL_F400(02, pibuf); \ | |
CALL_F400(03, pibuf); \ | |
CALL_F400(04, pibuf); \ | |
CALL_F400(05, pibuf); \ | |
CALL_F400(06, pibuf); \ | |
CALL_F400(07, pibuf); \ | |
CALL_F400(08, pibuf); \ | |
CALL_F400(09, pibuf); \ | |
CALL_F400(10, pibuf); \ | |
CALL_F400(11, pibuf); \ | |
CALL_F400(12, pibuf); \ | |
CALL_F400(13, pibuf); \ | |
CALL_F400(14, pibuf); \ | |
CALL_F400(15, pibuf); \ | |
CALL_F400(16, pibuf); \ | |
CALL_F400(17, pibuf); \ | |
CALL_F400(18, pibuf); \ | |
CALL_F400(19, pibuf); | |
static void bw_thread() { | |
// initilize the buffer to commit memory | |
volatile int* ibuf_save = new int[_data_size/sizeof(int)]{}; | |
// never returns | |
volatile bool _run = true; | |
while (_run) { | |
volatile int* ibuf = ibuf_save; | |
CALL_F8000(&ibuf); | |
} | |
} | |
int main(int argc, char *argv[]) { | |
int n = -1; | |
if (argc >= 2) { | |
n = std::atoi(argv[1]); | |
} | |
if (n <= 0) { | |
std::cerr << "usage: " << argv[0] << " number_of_threads\n"; | |
std::cerr << "e.g.: " << argv[0] << " 64\n"; | |
return 1; | |
} | |
std::cout << "threads = " << n << '\n'; | |
std::cout << "ctrl-c to exit\n"; | |
std::vector<std::thread> tv; | |
for (int i = 0; i < n - 1; ++i) { | |
tv.push_back(std::thread(bw_thread)); | |
} | |
bw_thread(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment