Created
July 3, 2022 20:56
-
-
Save jwatte/cafe738fb9d0d45c648d470d629d091b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test: malloc-release malloc-debug | |
./malloc-debug | |
./malloc-release | |
malloc-debug: MallocBenchmark.cpp | |
clang -O0 -D_DEBUG=1 -g -o malloc-debug MallocBenchmark.cpp | |
malloc-release: MallocBenchmark.cpp | |
clang -O3 -DNDEBUG=1 -o malloc-release MallocBenchmark.cpp |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Simple malloc timing program -- how much overhead is there in malloc/free? | |
* Looks like MSVC is 6x slower in debug, 4x slower in release, compared to | |
* clang on Linux. This is when doing matched mallocs and frees. | |
*/ | |
// AMD Ryzen Threadripper 1950x at 3.6 GHz | |
// Windows 10 Pro | |
// Visual Studio 2019, x64, debug: | |
// WIN32 DEBUG: 100000000 iterations, 39.072507 seconds: 390.725 ns/iter | |
// WIN32 RELEASE: 100000000 iterations, 19.773716 seconds: 197.737 ns/iter | |
// | |
// model name : Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz | |
// cpu MHz : 3501.426 | |
// Linux ip-172-31-31-60 5.13.0-1031-aws #35~20.04.1-Ubuntu SMP Mon Jun 13 22:30:30 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux | |
// Ubuntu clang version 13.0.1 | |
// | |
// POSIX DEBUG: 100000000 iterations, 6.537821 seconds: 65.378 ns/iter | |
// POSIX RELEASE: 100000000 iterations, 5.667803 seconds: 56.678 ns/iter | |
#if defined(_WIN32) | |
#include <Windows.h> | |
#else | |
#include <unistd.h> | |
#include <time.h> | |
#endif | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#if defined(_WIN32) | |
double clockMultiplier = 0.0; | |
char const platform[] = "WIN32"; | |
void init_clock() { | |
int64_t freq = 0; | |
QueryPerformanceFrequency((LARGE_INTEGER*)&freq); | |
clockMultiplier = 1.0 / double(freq); | |
} | |
double read_clock() { | |
int64_t ctr = 0; | |
QueryPerformanceCounter((LARGE_INTEGER*)&ctr); | |
return double(ctr) * clockMultiplier; | |
} | |
#else | |
char const platform[] = "POSIX"; | |
void init_clock() { | |
} | |
double read_clock() { | |
struct timespec tm; | |
clock_gettime(CLOCK_MONOTONIC, &tm); | |
return tm.tv_sec + 1e-9 * tm.tv_nsec; | |
} | |
#endif | |
constexpr int numIterations = 100 * 1000 * 1000; | |
void* pointers[numIterations]; | |
#if defined(NDEBUG) | |
char config[] = "RELEASE"; | |
#else | |
char config[] = "DEBUG"; | |
#endif | |
int main() | |
{ | |
init_clock(); | |
// Simple linear congruential random number generator, to make it exactly | |
// the same across platforms. The quality of this RNG doesn't matter for | |
// what we're measuring. | |
uint32_t rng = 15485863; | |
int allocFront = 0; | |
int freeFront = 0; | |
double startTime = read_clock(); | |
while (freeFront < numIterations) { | |
// some prime factors go into this RNG -- again, quality is good enough | |
rng = rng * 40003409 + 100003313; | |
// before we get to end, allocate 3x more than we deallocate | |
if (rng & 0xc000) { | |
if (allocFront < numIterations) { | |
// use a few different block sizes, each aligned to 32 bytes | |
pointers[allocFront] = malloc(32UL + ((rng & 0x600) >> 4)); | |
allocFront++; | |
} | |
} | |
else if (freeFront < allocFront) { | |
free(pointers[freeFront]); | |
freeFront++; | |
} | |
} | |
double endTime = read_clock(); | |
printf("%s %s: %d iterations, %f seconds: %.3f ns/iter\n", platform, config, numIterations, endTime - startTime, 1e9 * (endTime - startTime) / double(numIterations)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment