Skip to content

Instantly share code, notes, and snippets.

@aperezdc
Last active December 27, 2015 11:39
Show Gist options
  • Save aperezdc/7320320 to your computer and use it in GitHub Desktop.
Save aperezdc/7320320 to your computer and use it in GitHub Desktop.
Simple micro-benchmark to know which is faster: a virtual method invocation, or calling through a function pointer + userdata pointer (the userdata is meant to replace the usage of subclasses and also replaces passing the “this” pointer implicitly in virtual method calls).
/*
* clock.cc
* Copyright (C) 2013 Adrian Perez <[email protected]>
*
* Distributed under terms of the MIT license.
*/
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 199309L
#endif // !_POSIX_C_SOURCE
#include "clock.h"
#include <cstring>
#include <cerrno>
#include <cassert>
#include <cstdio>
#include <cmath>
#include <time.h>
#include <unistd.h>
#define CLOCK_MAX_RESOLUTION_DELTA (10000.0 * 1e-9)
#define CLOCK_MIN_RESOLUTION_DELTA (1e-10)
static double
posixClockTheoricalResolution()
{
struct timespec tv;
if (clock_getres(CLOCK_PROCESS_CPUTIME_ID, &tv) == -1)
return NAN;
return (double) tv.tv_sec + (double) tv.tv_nsec * 1e-9;
}
static bool
posixClockAvailable()
{
return !isnan(posixClockTheoricalResolution());
}
static inline double
posixClockEmpiricalResolution()
{
struct timespec tv;
double start;
double now;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tv) == -1)
assert(false);
start = (double) tv.tv_sec + (double) tv.tv_nsec * 1e-9;
do {
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tv) == -1)
assert(false);
now = (double) tv.tv_sec + (double) tv.tv_nsec * 1e-9;
} while ((now - start) < CLOCK_MIN_RESOLUTION_DELTA);
return (now - start);
}
bool usePosixClock()
{
static bool checked = false;
static bool useposix;
if (!checked) {
if (posixClockAvailable()) {
double res_theorical = posixClockTheoricalResolution();
double res_empirical = posixClockEmpiricalResolution();
useposix = fabs(res_theorical - res_empirical) <= CLOCK_MAX_RESOLUTION_DELTA;
}
else {
useposix = false;
}
checked = true;
}
return useposix;
}
Clock::Clock(bool autoReset):
_start(0.0),
_end(0.0)
{
if (autoReset)
reset();
}
void
Clock::reset()
{
if (usePosixClock()) {
struct timespec tv;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tv) == -1)
assert(false);
_start = (double) tv.tv_sec + (double) tv.tv_nsec * 1e-9;
}
else {
_start = (double) clock() / (double) CLOCKS_PER_SEC;
}
}
double
Clock::sample()
{
if (usePosixClock()) {
struct timespec tv;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tv) == -1)
assert(false);
_end = (double) tv.tv_sec + (double) tv.tv_nsec * 1e-9;
}
else {
_end = (double) clock() / (double) CLOCKS_PER_SEC;
}
return elapsed();
}
#ifdef MAIN
int
main(int argc, char *argv[])
{
Clock process_time;
if (posixClockAvailable()) {
std::printf("CLOCK_PROCESS_CPUTIME_ID is supported\n");
std::printf("Resolution (advertised/empirical): %.10f/%.10fs\n",
posixClockTheoricalResolution(),
posixClockEmpiricalResolution());
}
else {
std::printf("CLOCK_PROCESS_CPUTIME_ID is NOT supported\n");
}
Clock resolution;
while (resolution.sample() < CLOCK_MIN_RESOLUTION_DELTA) /* nothing */ ;
std::printf("Sampled resolution: %.10fs\n", resolution.elapsed());
std::printf("Printing the lines above took %.10fs\n", process_time.sample());
}
#endif // MAIN
/*
* clock.h
* Copyright (C) 2013 Adrian Perez <[email protected]>
*
* Distributed under terms of the MIT license.
*/
#ifndef __clock_h__
#define __clock_h__
#include <stdint.h>
class Clock
{
private:
double _start;
double _end;
public:
Clock(bool autoReset = true);
// Elapsed time since at the moment of the last call to sample(), ns
double elapsed() const { return _end - _start; }
// Take a sample of the current time, return elapsed time, ns
double sample();
// Reset the time counter, e.g. set the “starting time”
void reset();
// Helper class to use RAII to measure a code block, use it like this:
//
// Clock clk;
// /* ... */
//
// {
// Clock::Measure measure(clk);
// /* ... */
// }
//
// time_elapsed = clk.elapsed();
//
class Measure {
private:
Clock& _clock;
public:
Measure(Clock& clock): _clock(clock) { _clock.reset(); }
~Measure() { _clock.sample(); }
};
};
#endif /* !__clock_h__ */
/*
* vcall-perf.cc
* Copyright (C) 2013 Adrian Perez <[email protected]>
*
* Distributed under terms of the MIT license.
*/
#include "clock.h"
#include <cstdio>
struct CallbackTable {
void* (*CallMe)(void* userdata, size_t size);
void* userdata;
};
static void* CbCallMe(void* userdata, size_t size) __attribute__((noinline));
static void* CbCallMe(void* userdata, size_t size)
{
asm volatile ("");
return (void*) 42;
}
class CallbackInvocation {
public:
CallbackInvocation(CallbackTable* cb): cb_(cb) {}
inline void* CallMe(size_t size) {
return (*cb_->CallMe)(cb_->userdata, size);
}
private:
CallbackTable *cb_;
};
class VTableInvocationBase {
public:
virtual void* CallMe(size_t size) __attribute__((noinline)) = 0;
virtual ~VTableInvocationBase() {}
};
class VTableInvocation : public VTableInvocationBase {
public:
virtual void* CallMe(size_t size) __attribute__((noinline))
{
asm volatile ("");
return (void*) 42;
}
};
int main(int argc, char **argv)
{
Clock clk;
if (argc > 1) {
VTableInvocation *i_vt = new VTableInvocation();
{
Clock::Measure measure(clk);
for (unsigned i = 0; i < 10000000; i++) {
i_vt->CallMe(i);
}
}
delete i_vt;
printf("vtable: %f\n", clk.elapsed());
}
else {
CallbackTable callbacks = {
CbCallMe,
(void*) 42,
};
CallbackInvocation i_cb(&callbacks);
{
Clock::Measure measure(clk);
for (unsigned i = 0; i < 10000000; i++) {
i_cb.CallMe(i);
}
}
printf("callbacks: %f\n", clk.elapsed());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment