Created
January 9, 2022 12:32
-
-
Save BigRedEye/582d25ad01c634347c0aa31c13369aa1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <linux/perf_event.h> | |
#include <linux/hw_breakpoint.h> | |
#include <sys/syscall.h> | |
#include <sys/ioctl.h> | |
#include <unistd.h> | |
#include <stdexcept> | |
#include <optional> | |
namespace perf { | |
using int64 = long long; | |
struct PerfMetrics | |
{ | |
int64 CpuInstructions = 0; | |
int64 CacheMisses = 0; | |
int64 TimeNs = 0; | |
}; | |
class PerfEvent { | |
public: | |
explicit PerfEvent(__u32 type, __u32 config) { | |
struct perf_event_attr attr{}; | |
attr.size = sizeof(attr); | |
attr.type = type; | |
attr.config = config; | |
attr.disabled = 1; | |
fd_ = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); | |
if (fd_ == -1) { | |
throw std::runtime_error{"Failed to open perf event"}; | |
} | |
} | |
void Start() { | |
ioctl(fd_, PERF_EVENT_IOC_RESET); | |
ioctl(fd_, PERF_EVENT_IOC_ENABLE); | |
} | |
void Stop() { | |
ioctl(fd_, PERF_EVENT_IOC_DISABLE); | |
} | |
std::optional<int64> Read() { | |
int64 res = 0; | |
if (::read(fd_, &res, sizeof(res)) != sizeof(res)) { | |
return std::nullopt; | |
} | |
return res; | |
} | |
~PerfEvent() { | |
::close(fd_); | |
} | |
public: | |
static PerfEvent CpuInstructions() { | |
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS}; | |
} | |
static PerfEvent CacheMisses() { | |
return PerfEvent{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}; | |
} | |
static PerfEvent TaskClock() { | |
return PerfEvent{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK}; | |
} | |
private: | |
int fd_ = -1; | |
}; | |
} // namespace perf | |
#include <fmt/format.h> | |
#include <chrono> | |
#include <cstddef> | |
#include <unordered_map> | |
#include <vector> | |
// The best allocator ever | |
char* init = (char*)malloc(8ull * 1024 * 1024 * 1024); | |
char* buf = init; | |
void* operator new(size_t count) { | |
char* ptr = buf; | |
while (count % 16) { | |
++count; | |
} | |
buf += count; | |
return ptr; | |
} | |
void operator delete(void* ptr, size_t count) {} | |
void reset_mem() { | |
buf = init; | |
} | |
constexpr int ITERATIONS = 100'000'000; | |
using Clock = std::chrono::steady_clock; | |
double Seconds(Clock::duration delta) { | |
return std::chrono::duration_cast<std::chrono::duration<double>>(delta).count(); | |
} | |
__attribute__((noinline)) void fill_map(std::vector<int>& map) { | |
volatile int* ptr = map.data(); | |
for (int i = 0; i < ITERATIONS; ++i) { | |
map[i] = -i; | |
} | |
} | |
template <template <class ...> class Map> | |
__attribute__((noinline)) void run_map_test() { | |
perf::PerfEvent event = perf::PerfEvent::CpuInstructions(); | |
event.Start(); | |
std::vector<int> map; | |
map.resize(ITERATIONS); | |
fill_map(map); | |
auto delta = *event.Read(); | |
fmt::print("{} instructions per iteration\n", static_cast<double>(delta) / ITERATIONS); | |
} | |
int main() { | |
for (int i = 0; i < 10; ++i) { | |
auto start = Clock::now(); | |
reset_mem(); | |
run_map_test<std::unordered_map>(); | |
auto end = Clock::now(); | |
fmt::print("It took {} secs for {} iterations\n", Seconds(end - start), ITERATIONS); | |
} | |
} | |
/* Possible output: | |
4.84316289 instructions per iteration | |
It took 0.242586475 secs for 100000000 iterations | |
2.00129663 instructions per iteration | |
It took 0.068408634 secs for 100000000 iterations | |
2.00115756 instructions per iteration | |
It took 0.068283233 secs for 100000000 iterations | |
2.0010276 instructions per iteration | |
It took 0.068269616 secs for 100000000 iterations | |
2.00129688 instructions per iteration | |
It took 0.06803983 secs for 100000000 iterations | |
2.00105244 instructions per iteration | |
It took 0.065850256 secs for 100000000 iterations | |
2.00100018 instructions per iteration | |
It took 0.06551507 secs for 100000000 iterations | |
2.00124106 instructions per iteration | |
It took 0.065625378 secs for 100000000 iterations | |
2.00103205 instructions per iteration | |
It took 0.065819786 secs for 100000000 iterations | |
2.00099453 instructions per iteration | |
It took 0.06574377 secs for 100000000 iterations | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment