Last active
November 18, 2024 12:10
-
-
Save silahian/8005a4cc01245dfb7742837c742bb053 to your computer and use it in GitHub Desktop.
Pre allocated vs dynamic arrays performance for low latency / high frequency trading systems
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example program | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <algorithm> | |
#include <time.h> | |
// *********************************** | |
// This is for measuring CPU clocks | |
#if defined(__i386__) | |
static __inline__ unsigned long long rdtsc(void) | |
{ | |
unsigned long long int x; | |
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); | |
return x; | |
} | |
#elif defined(__x86_64__) | |
static __inline__ unsigned long long rdtsc(void) | |
{ | |
unsigned hi, lo; | |
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); | |
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); | |
} | |
#endif | |
// *********************************** | |
using namespace std; | |
struct BookPrice | |
{ | |
int size; | |
double price; | |
}; | |
vector<BookPrice> aDynamic; | |
vector<BookPrice> aPreallocated(10); | |
void AddRemoveDynamically(int type, BookPrice& b); | |
void AddRemovePre(int type, BookPrice& b); | |
int totAddsDyn=0, totDelsDyn=0; | |
int totAddsFix=0, totDelsFix=0; | |
int main() | |
{ | |
int sampleRun = 1000000; | |
double sumRunDyn = 0; | |
double sumRunFix = 0; | |
for(int i=0; i<sampleRun; i++) | |
{ | |
BookPrice b; | |
b.price=rand() % 10 + 1; //random 1 to 10 | |
int type = (rand() % 2) + 1; //random 1 to 2 | |
auto ini = rdtsc(); | |
AddRemoveDynamically(type, b); | |
auto end = rdtsc(); | |
sumRunDyn += end-ini; | |
ini = rdtsc(); | |
AddRemovePre(type, b); | |
end = rdtsc(); | |
sumRunFix += end-ini; | |
} | |
cout << "Dynamic Took: "<< sumRunDyn/sampleRun << endl; | |
cout << "Pre-allocated Took: "<< sumRunFix/sampleRun << endl; | |
cout << "Difference: % " << 100 * ((sumRunDyn/sampleRun) - (sumRunFix/sampleRun)) / (sumRunFix/sampleRun) << endl; | |
} | |
/* | |
ADD type=1 | |
DELETE type=2 | |
*/ | |
void AddRemoveDynamically(int type, BookPrice& b) | |
{ | |
if (type == 1) | |
{ | |
totAddsDyn++; | |
//add element | |
aDynamic.push_back(b); | |
} | |
else if (type == 2) | |
{ | |
totDelsDyn++; | |
aDynamic.erase(std::remove_if(aDynamic.begin(), aDynamic.end(), [&](BookPrice const & p) {return p.price == b.price;}),aDynamic.end()); | |
} | |
std::sort(aDynamic.begin(), aDynamic.end(), [&](BookPrice& ltd, BookPrice& rtd){ return ltd.price < rtd.price; }); | |
} | |
void AddRemovePre(int type, BookPrice& b) | |
{ | |
if (type == 1) | |
{ | |
totAddsFix++; | |
//always rewrite first item, since newer | |
aPreallocated[0] = b; | |
} | |
else if (type == 2) | |
{ | |
totDelsFix++; | |
auto it= find_if(aPreallocated.begin(), aPreallocated.end(), [&](BookPrice const & p) {return p.price == b.price;}); | |
if (it != aPreallocated.end()) | |
{ | |
it->price=0; | |
it->size=0; | |
} | |
} | |
std::sort(aPreallocated.begin(), aPreallocated.end(), [&](BookPrice& ltd, BookPrice& rtd){ return ltd.price < rtd.price; }); | |
} | |
This is the result taking CPU cycles:
Dynamic Took: 2522.17
Pre-allocated Took: 1575.98
Difference: % 60.0384
Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz
Windows 10, x64
Dynamic Took: 151.141
Pre-allocated Took: 81.5159
Difference: % 85.4126
Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz
Windows 10, x32
Dynamic Took: 145.896
Pre-allocated Took: 91.5588
Difference: % 59.3466
Cool. Thanks!
However, it's a little bit weird as consecutive runs can yield very different results, at least on this old dinosaur of a laptop.
Intel(R) Core(TM) i7-4710HQ CPU @ 2.50GHz
Microsoft Windows 8.1 (64-bit)
16 GB RAM
Dynamic Took: 1532.61
Pre-allocated Took: 1328.66
Difference: % 15.3504
Dynamic Took: 1520.74
Pre-allocated Took: 1220.81
Difference: % 24.5686
yeah, but it was 64 bit vs 32 bit.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This snippet code is to show performance difference between pre-allocated arrays vs dynamic.
This is a basic example what's better to use when try to hold a limit order book in high frequency trading systems