Created
January 9, 2022 12:31
-
-
Save BigRedEye/b97b0ca43a60c74776f5fe14052d2305 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <linux/perf_event.h> | |
#include <linux/hw_breakpoint.h> | |
#include <sys/syscall.h> | |
#include <sys/ioctl.h> | |
#include <unistd.h> | |
#include <stdexcept> | |
#include <optional> | |
namespace perf { | |
using int64 = long long; | |
struct PerfMetrics | |
{ | |
int64 CpuInstructions = 0; | |
int64 CacheMisses = 0; | |
int64 TimeNs = 0; | |
}; | |
class PerfEvent { | |
public: | |
explicit PerfEvent(__u32 type, __u32 config) { | |
struct perf_event_attr attr{}; | |
attr.size = sizeof(attr); | |
attr.type = type; | |
attr.config = config; | |
attr.disabled = 1; | |
fd_ = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); | |
if (fd_ == -1) { | |
throw std::runtime_error{"Failed to open perf event"}; | |
} | |
} | |
void Start() { | |
ioctl(fd_, PERF_EVENT_IOC_RESET); | |
ioctl(fd_, PERF_EVENT_IOC_ENABLE); | |
} | |
void Stop() { | |
ioctl(fd_, PERF_EVENT_IOC_DISABLE); | |
} | |
std::optional<int64> Read() { | |
int64 res = 0; | |
if (::read(fd_, &res, sizeof(res)) != sizeof(res)) { | |
return std::nullopt; | |
} | |
return res; | |
} | |
~PerfEvent() { | |
::close(fd_); | |
} | |
public: | |
static PerfEvent CpuInstructions() { | |
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS}; | |
} | |
static PerfEvent CacheMisses() { | |
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}; | |
} | |
static PerfEvent TaskClock() { | |
return PerfEvent{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK}; | |
} | |
private: | |
int fd_ = -1; | |
}; | |
} // namespace perf | |
#include <fmt/format.h> | |
#include <chrono> | |
#include <cstddef> | |
#include <unordered_map> | |
#include <vector> | |
// The best allocator ever | |
char* init = (char*)malloc(8ull * 1024 * 1024 * 1024); | |
char* buf = init; | |
void* operator new(size_t count) { | |
char* ptr = buf; | |
while (count % 16) { | |
++count; | |
} | |
buf += count; | |
return ptr; | |
} | |
void operator delete(void* ptr, size_t count) {} | |
void reset_mem() { | |
buf = init; | |
} | |
constexpr int ITERATIONS = 100'000'000; | |
using Clock = std::chrono::steady_clock; | |
double Seconds(Clock::duration delta) { | |
return std::chrono::duration_cast<std::chrono::duration<double>>(delta).count(); | |
} | |
__attribute__((noinline)) void fill_map(std::vector<int>& map) { | |
volatile int* ptr = map.data(); | |
for (int i = 0; i < ITERATIONS; ++i) { | |
map[i] = -i; | |
} | |
} | |
template <template <class ...> class Map> | |
__attribute__((noinline)) void run_map_test() { | |
perf::PerfEvent event = perf::PerfEvent::CpuInstructions(); | |
event.Start(); | |
std::vector<int> map; | |
map.resize(ITERATIONS); | |
fill_map(map); | |
auto delta = *event.Read(); | |
fmt::print("{} instructions per iteration\n", static_cast<double>(delta) / ITERATIONS); | |
} | |
int main() { | |
for (int i = 0; i < 10; ++i) { | |
auto start = Clock::now(); | |
reset_mem(); | |
run_map_test<std::unordered_map>(); | |
auto end = Clock::now(); | |
fmt::print("It took {} secs for {} iterations\n", Seconds(end - start), ITERATIONS); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment