Skip to content

Instantly share code, notes, and snippets.

@cyb70289
Created January 12, 2024 09:14
Show Gist options
  • Save cyb70289/86f7f600a110d69de99d4aff36c1818b to your computer and use it in GitHub Desktop.
Save cyb70289/86f7f600a110d69de99d4aff36c1818b to your computer and use it in GitHub Desktop.
bw-test.cc
// tested with g++-10.5, probably okay for other versions as
// the code is quite simple, check assembly to make sure
// g++ -std=c++11 -O3 -pthread -static bw-test.cc -o bw-test
// XXX: it costs about half minute to compile this file
#include <cstdlib>
#include <iostream>
#include <thread>
#include <vector>
// 4M code size: 8000 distinct functions, each function is 512 bytes aligned
constexpr int _n_funcs = 8000;
// 8M data size per thread
constexpr int _data_size = 8 * 1000 * 1000;
// data size processed for each function, i.e., 1/8000 of all data
constexpr int _ints_per_func = _data_size / _n_funcs / sizeof(int);
/*
* function prototype:
*
* // define a dummy variable just to make sure all functions are
* // different and the compiler won't merge them to one function
* volatile int _u000000;
*
* __attribute__ ((noinline))
* __attribute__ ((aligned(512)))
* void f000000(volatile int** pibuf) {
* _u000000 = 1;
* volatile int* ibuf = *pibuf;
* for (int i = 0; i < _ints_per_func; ++i) {
* ++ibuf[i];
* }
* (*pibuf) += _ints_per_func;
* }
*/
#define F1(a, b, c) \
volatile int _u ## a ## b ## c; \
__attribute__ ((noinline)) \
__attribute__ ((aligned(512))) \
void f ## a ## b ## c(volatile int** pibuf) { \
_u ## a ## b ## c = 1; \
volatile int* ibuf = *pibuf; \
for (int i = 0; i < _ints_per_func; ++i) { \
++ibuf[i]; \
} \
(*pibuf) += _ints_per_func; \
}
// define 20 funcs: fxxxx00, ..., fxxxx19
#define F20(a,b) \
F1(a,b,00) \
F1(a,b,01) \
F1(a,b,02) \
F1(a,b,03) \
F1(a,b,04) \
F1(a,b,05) \
F1(a,b,06) \
F1(a,b,07) \
F1(a,b,08) \
F1(a,b,09) \
F1(a,b,10) \
F1(a,b,11) \
F1(a,b,12) \
F1(a,b,13) \
F1(a,b,14) \
F1(a,b,15) \
F1(a,b,16) \
F1(a,b,17) \
F1(a,b,18) \
F1(a,b,19)
// define 20*20=400 funs: fxx0000, ..., fxx1919
#define F400(a) \
F20(a,00) \
F20(a,01) \
F20(a,02) \
F20(a,03) \
F20(a,04) \
F20(a,05) \
F20(a,06) \
F20(a,07) \
F20(a,08) \
F20(a,09) \
F20(a,10) \
F20(a,11) \
F20(a,12) \
F20(a,13) \
F20(a,14) \
F20(a,15) \
F20(a,16) \
F20(a,17) \
F20(a,18) \
F20(a,19)
// define 20*400=8000 functions: f000000, ..., f191919
F400(00)
F400(01)
F400(02)
F400(03)
F400(04)
F400(05)
F400(06)
F400(07)
F400(08)
F400(09)
F400(10)
F400(11)
F400(12)
F400(13)
F400(14)
F400(15)
F400(16)
F400(17)
F400(18)
F400(19)
// call function
#define CALL_F1(a,b,c,pibuf) f ## a ## b ## c(pibuf)
// call 20 functions
#define CALL_F20(a,b,pibuf) \
CALL_F1(a,b,00,pibuf); \
CALL_F1(a,b,01,pibuf); \
CALL_F1(a,b,02,pibuf); \
CALL_F1(a,b,03,pibuf); \
CALL_F1(a,b,04,pibuf); \
CALL_F1(a,b,05,pibuf); \
CALL_F1(a,b,06,pibuf); \
CALL_F1(a,b,07,pibuf); \
CALL_F1(a,b,08,pibuf); \
CALL_F1(a,b,09,pibuf); \
CALL_F1(a,b,10,pibuf); \
CALL_F1(a,b,11,pibuf); \
CALL_F1(a,b,12,pibuf); \
CALL_F1(a,b,13,pibuf); \
CALL_F1(a,b,14,pibuf); \
CALL_F1(a,b,15,pibuf); \
CALL_F1(a,b,16,pibuf); \
CALL_F1(a,b,17,pibuf); \
CALL_F1(a,b,18,pibuf); \
CALL_F1(a,b,19,pibuf);
// call 20*20=400 functions
#define CALL_F400(a,pibuf) \
CALL_F20(a,00,pibuf); \
CALL_F20(a,01,pibuf); \
CALL_F20(a,02,pibuf); \
CALL_F20(a,03,pibuf); \
CALL_F20(a,04,pibuf); \
CALL_F20(a,05,pibuf); \
CALL_F20(a,06,pibuf); \
CALL_F20(a,07,pibuf); \
CALL_F20(a,08,pibuf); \
CALL_F20(a,09,pibuf); \
CALL_F20(a,10,pibuf); \
CALL_F20(a,11,pibuf); \
CALL_F20(a,12,pibuf); \
CALL_F20(a,13,pibuf); \
CALL_F20(a,14,pibuf); \
CALL_F20(a,15,pibuf); \
CALL_F20(a,16,pibuf); \
CALL_F20(a,17,pibuf); \
CALL_F20(a,18,pibuf); \
CALL_F20(a,19,pibuf);
// call 20*400=8000 functions
#define CALL_F8000(pibuf) \
CALL_F400(00, pibuf); \
CALL_F400(01, pibuf); \
CALL_F400(02, pibuf); \
CALL_F400(03, pibuf); \
CALL_F400(04, pibuf); \
CALL_F400(05, pibuf); \
CALL_F400(06, pibuf); \
CALL_F400(07, pibuf); \
CALL_F400(08, pibuf); \
CALL_F400(09, pibuf); \
CALL_F400(10, pibuf); \
CALL_F400(11, pibuf); \
CALL_F400(12, pibuf); \
CALL_F400(13, pibuf); \
CALL_F400(14, pibuf); \
CALL_F400(15, pibuf); \
CALL_F400(16, pibuf); \
CALL_F400(17, pibuf); \
CALL_F400(18, pibuf); \
CALL_F400(19, pibuf);
static void bw_thread() {
// initilize the buffer to commit memory
volatile int* ibuf_save = new int[_data_size/sizeof(int)]{};
// never returns
volatile bool _run = true;
while (_run) {
volatile int* ibuf = ibuf_save;
CALL_F8000(&ibuf);
}
}
int main(int argc, char *argv[]) {
int n = -1;
if (argc >= 2) {
n = std::atoi(argv[1]);
}
if (n <= 0) {
std::cerr << "usage: " << argv[0] << " number_of_threads\n";
std::cerr << "e.g.: " << argv[0] << " 64\n";
return 1;
}
std::cout << "threads = " << n << '\n';
std::cout << "ctrl-c to exit\n";
std::vector<std::thread> tv;
for (int i = 0; i < n - 1; ++i) {
tv.push_back(std::thread(bw_thread));
}
bw_thread();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment