Skip to content

Instantly share code, notes, and snippets.

@BigRedEye
Created January 9, 2022 12:31
Show Gist options
  • Save BigRedEye/b97b0ca43a60c74776f5fe14052d2305 to your computer and use it in GitHub Desktop.
Save BigRedEye/b97b0ca43a60c74776f5fe14052d2305 to your computer and use it in GitHub Desktop.
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdexcept>
#include <optional>
namespace perf {
using int64 = long long;
struct PerfMetrics
{
int64 CpuInstructions = 0;
int64 CacheMisses = 0;
int64 TimeNs = 0;
};
class PerfEvent {
public:
explicit PerfEvent(__u32 type, __u32 config) {
struct perf_event_attr attr{};
attr.size = sizeof(attr);
attr.type = type;
attr.config = config;
attr.disabled = 1;
fd_ = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
if (fd_ == -1) {
throw std::runtime_error{"Failed to open perf event"};
}
}
void Start() {
ioctl(fd_, PERF_EVENT_IOC_RESET);
ioctl(fd_, PERF_EVENT_IOC_ENABLE);
}
void Stop() {
ioctl(fd_, PERF_EVENT_IOC_DISABLE);
}
std::optional<int64> Read() {
int64 res = 0;
if (::read(fd_, &res, sizeof(res)) != sizeof(res)) {
return std::nullopt;
}
return res;
}
~PerfEvent() {
::close(fd_);
}
public:
static PerfEvent CpuInstructions() {
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS};
}
static PerfEvent CacheMisses() {
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES};
}
static PerfEvent TaskClock() {
return PerfEvent{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK};
}
private:
int fd_ = -1;
};
} // namespace perf
#include <fmt/format.h>
#include <chrono>
#include <cstddef>
#include <unordered_map>
#include <vector>
// The best allocator ever
char* init = (char*)malloc(8ull * 1024 * 1024 * 1024);
char* buf = init;
void* operator new(size_t count) {
char* ptr = buf;
while (count % 16) {
++count;
}
buf += count;
return ptr;
}
void operator delete(void* ptr, size_t count) {}
void reset_mem() {
buf = init;
}
constexpr int ITERATIONS = 100'000'000;
using Clock = std::chrono::steady_clock;
double Seconds(Clock::duration delta) {
return std::chrono::duration_cast<std::chrono::duration<double>>(delta).count();
}
__attribute__((noinline)) void fill_map(std::vector<int>& map) {
volatile int* ptr = map.data();
for (int i = 0; i < ITERATIONS; ++i) {
map[i] = -i;
}
}
template <template <class ...> class Map>
__attribute__((noinline)) void run_map_test() {
perf::PerfEvent event = perf::PerfEvent::CpuInstructions();
event.Start();
std::vector<int> map;
map.resize(ITERATIONS);
fill_map(map);
auto delta = *event.Read();
fmt::print("{} instructions per iteration\n", static_cast<double>(delta) / ITERATIONS);
}
int main() {
for (int i = 0; i < 10; ++i) {
auto start = Clock::now();
reset_mem();
run_map_test<std::unordered_map>();
auto end = Clock::now();
fmt::print("It took {} secs for {} iterations\n", Seconds(end - start), ITERATIONS);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment