Skip to content

Instantly share code, notes, and snippets.

@kayru
Last active June 24, 2018 21:30
Show Gist options
  • Save kayru/7b2f8710ee5b73c8cf019195f45a982f to your computer and use it in GitHub Desktop.
Save kayru/7b2f8710ee5b73c8cf019195f45a982f to your computer and use it in GitHub Desktop.
CPU cache performance testbed
// clang++ -o cachebench -O2 -std=c++14 cachebench.cpp
#include <stdio.h>
#include <chrono>
#include <vector>
#include <algorithm>
#include <random>
#include <float.h>
#include <stdint.h>
#include <cstring>
#include <memory.h>
#include <stdlib.h>
using TimePoint = std::chrono::high_resolution_clock::time_point;
using u64 = unsigned long long;
using u32 = unsigned int;
inline auto timeDiff(TimePoint t1, TimePoint t2)
{
using namespace std::chrono;
return duration_cast<duration<double>>(t2 - t1).count();
}
inline auto getTimestamp()
{
using namespace std::chrono;
return high_resolution_clock::now();
}
inline auto getCycleCount()
{
#if TARGET_OS_IPHONE
return 0; // TODO
#else
return __builtin_readcyclecounter();
#endif
}
struct Element
{
u64 next;
u64 value;
u64 padding[6];
};
static_assert(sizeof(Element) == 64, "Element must occupy one cache line");
double humanSizeValue(double x)
{
if (x < (1<<10)) return x;
if (x < (1<<20)) return x / (1<<10);
if (x < (1<<30)) return x / (1<<20);
else return x / (1<<30);
}
const char* humanSizeUnit(double x)
{
if (x < (1<<10)) return "B";
if (x < (1<<20)) return "KiB";
if (x < (1<<30)) return "MiB";
else return "GiB";
}
int runBenchmark(int bufferSizeLog2)
{
auto randomEngine = std::default_random_engine(54321);
const u64 elemSize = sizeof(Element);
const u64 elemCount = ((1ull << bufferSizeLog2) + elemSize - 1) / elemSize;
const u64 bufferSize = elemSize * elemCount;
printf("Data size: %g %s (%llu cache lines) - ",
humanSizeValue(bufferSize),
humanSizeUnit(bufferSize),
elemCount);
Element* elements = (Element*)malloc(bufferSize);
std::memset(elements, 0, bufferSize);
for (u64 i=0; i<elemCount; ++i)
{
elements[i].value = i;
}
std::shuffle(elements, elements+elemCount, randomEngine);
for (u64 i=0; i<elemCount; ++i)
{
u64 cursor = elements[i].value;
elements[cursor].next = elements[(i+1)%elemCount].value;
}
double totalTime = 0.0;
u64 accum = 0;
const u64 elementsToVisit = 32 * 1024 * 1024;
TimePoint t1 = getTimestamp();
auto c1 = getCycleCount();
u64 cursor = 0;
for(u64 i=0; i<elementsToVisit; ++i)
{
accum += elements[cursor].value;
cursor = elements[cursor].next;
}
TimePoint t2 = getTimestamp();
auto c2 = getCycleCount();
totalTime += timeDiff(t1, t2);
const u64 totalCycles = c2-c1;
printf("Total time: %f sec - ", totalTime);
printf("Time/elem: %f ns - ", ((1e9*totalTime) / elementsToVisit));
if (totalCycles)
{
printf("Cycles/elem: %f", (double(totalCycles) / elementsToVisit));
}
printf("\n");
free(elements);
return accum != 0;
}
int main(int argc, char** argv)
{
int bufferSizeLog2 = 0;
if (argc > 1)
{
bufferSizeLog2 = atoi(argv[1]);
}
if (bufferSizeLog2 == 0)
{
for (int i=10; i<28; ++i)
{
printf("sizeLog2: %d - ", i);
runBenchmark(i);
}
}
else
{
runBenchmark(bufferSizeLog2);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment