Last active
June 24, 2018 21:30
-
-
Save kayru/7b2f8710ee5b73c8cf019195f45a982f to your computer and use it in GitHub Desktop.
CPU cache performance testbed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang++ -o cachebench -O2 -std=c++14 cachebench.cpp | |
#include <stdio.h> | |
#include <chrono> | |
#include <vector> | |
#include <algorithm> | |
#include <random> | |
#include <float.h> | |
#include <stdint.h> | |
#include <cstring> | |
#include <memory.h> | |
#include <stdlib.h> | |
using TimePoint = std::chrono::high_resolution_clock::time_point; | |
using u64 = unsigned long long; | |
using u32 = unsigned int; | |
inline auto timeDiff(TimePoint t1, TimePoint t2) | |
{ | |
using namespace std::chrono; | |
return duration_cast<duration<double>>(t2 - t1).count(); | |
} | |
inline auto getTimestamp() | |
{ | |
using namespace std::chrono; | |
return high_resolution_clock::now(); | |
} | |
inline auto getCycleCount() | |
{ | |
#if TARGET_OS_IPHONE | |
return 0; // TODO | |
#else | |
return __builtin_readcyclecounter(); | |
#endif | |
} | |
struct Element | |
{ | |
u64 next; | |
u64 value; | |
u64 padding[6]; | |
}; | |
static_assert(sizeof(Element) == 64, "Element must occupy one cache line"); | |
double humanSizeValue(double x) | |
{ | |
if (x < (1<<10)) return x; | |
if (x < (1<<20)) return x / (1<<10); | |
if (x < (1<<30)) return x / (1<<20); | |
else return x / (1<<30); | |
} | |
const char* humanSizeUnit(double x) | |
{ | |
if (x < (1<<10)) return "B"; | |
if (x < (1<<20)) return "KiB"; | |
if (x < (1<<30)) return "MiB"; | |
else return "GiB"; | |
} | |
int runBenchmark(int bufferSizeLog2) | |
{ | |
auto randomEngine = std::default_random_engine(54321); | |
const u64 elemSize = sizeof(Element); | |
const u64 elemCount = ((1ull << bufferSizeLog2) + elemSize - 1) / elemSize; | |
const u64 bufferSize = elemSize * elemCount; | |
printf("Data size: %g %s (%llu cache lines) - ", | |
humanSizeValue(bufferSize), | |
humanSizeUnit(bufferSize), | |
elemCount); | |
Element* elements = (Element*)malloc(bufferSize); | |
std::memset(elements, 0, bufferSize); | |
for (u64 i=0; i<elemCount; ++i) | |
{ | |
elements[i].value = i; | |
} | |
std::shuffle(elements, elements+elemCount, randomEngine); | |
for (u64 i=0; i<elemCount; ++i) | |
{ | |
u64 cursor = elements[i].value; | |
elements[cursor].next = elements[(i+1)%elemCount].value; | |
} | |
double totalTime = 0.0; | |
u64 accum = 0; | |
const u64 elementsToVisit = 32 * 1024 * 1024; | |
TimePoint t1 = getTimestamp(); | |
auto c1 = getCycleCount(); | |
u64 cursor = 0; | |
for(u64 i=0; i<elementsToVisit; ++i) | |
{ | |
accum += elements[cursor].value; | |
cursor = elements[cursor].next; | |
} | |
TimePoint t2 = getTimestamp(); | |
auto c2 = getCycleCount(); | |
totalTime += timeDiff(t1, t2); | |
const u64 totalCycles = c2-c1; | |
printf("Total time: %f sec - ", totalTime); | |
printf("Time/elem: %f ns - ", ((1e9*totalTime) / elementsToVisit)); | |
if (totalCycles) | |
{ | |
printf("Cycles/elem: %f", (double(totalCycles) / elementsToVisit)); | |
} | |
printf("\n"); | |
free(elements); | |
return accum != 0; | |
} | |
int main(int argc, char** argv) | |
{ | |
int bufferSizeLog2 = 0; | |
if (argc > 1) | |
{ | |
bufferSizeLog2 = atoi(argv[1]); | |
} | |
if (bufferSizeLog2 == 0) | |
{ | |
for (int i=10; i<28; ++i) | |
{ | |
printf("sizeLog2: %d - ", i); | |
runBenchmark(i); | |
} | |
} | |
else | |
{ | |
runBenchmark(bufferSizeLog2); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment