Skip to content

Instantly share code, notes, and snippets.

@chandlerc
Forked from thoughtpolice/bench.h
Created July 22, 2017 04:13
Show Gist options
  • Save chandlerc/0ac03383a56717204806185a580c82e4 to your computer and use it in GitHub Desktop.
Save chandlerc/0ac03383a56717204806185a580c82e4 to your computer and use it in GitHub Desktop.
Intel ERMSB benchmarking
/* bench.h - benchmark harness
Written in 2014 by Austin Seipp <[email protected]>
To the extent possible under law, the author(s) have dedicated all
copyright and related and neighboring rights to this software to
the public domain worldwide. This software is distributed without
any warranty.
You should have received a copy of the CC0 Public Domain Dedication
along with this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef __BENCH_H__
#define __BENCH_H__
#if defined(USE_LINUX_PERF) && defined(__linux__)
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
static int ticks_fddev = -1;
uint64_t get_ticks(void)
{
uint64_t result;
if (ticks_fddev == -1) {
static struct perf_event_attr attr;
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
ticks_fddev = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
}
if (read(ticks_fddev,&result,sizeof result) != sizeof result) return 0;
return result;
}
#else
static inline uint64_t
get_ticks(void)
{
#if defined(__COMPCERT__)
return __builtin_rdtsc();
#elif defined(__i386__) || defined(__amd64__)
uint32_t lo = 0, hi = 0;
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
return ((uint64_t)lo | ((uint64_t)hi << 32));
#elif defined(__powerpc__)
uint32_t lo = 0, hi = 0;
__asm__ __volatile__("mftbu %0; mftb %1" : "=r" (hi), "=r" (lo));
return ((uint64_t)lo | ((uint64_t)hi << 32));
#else
#error need a get_ticks() function
#endif
}
#endif
#define timeit(x,minvar) { \
ticks = get_ticks(); \
x; \
ticks = get_ticks() - ticks; \
if (ticks < minvar) \
minvar = ticks; \
}
#define maxticks 0xffffffffffffffffull
#include "osfreq.c"
#endif /* __BENCH_H__ */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "bench.h"
#define NL "\n\t"
#define ALIGN64 __attribute__((__aligned__(64)))
static inline __attribute__((always_inline))
void
ermsb_memcpy(void* dst, void* src, size_t sz)
{
__asm__ volatile("movq %0, %%rsi" NL
"movq %1, %%rdi" NL
"movq %2, %%rcx" NL
"rep movsb" NL
: /* no outputs */
: "g" (src), "g" (dst), "g" (sz)
: "rsi", "rdi", "rcx"
);
}
#ifdef ERMSB
#define BENCH_MEMCPY ermsb_memcpy
#else
#define BENCH_MEMCPY memcpy
#endif
int
main(int ac, char** av)
{
static size_t lengths[] = {16, 64, 256, 1024, 8192, 0};
ALIGN64 unsigned char buf[8192] = {255};
ALIGN64 unsigned char out[8192] = {0,};
size_t i, j;
uint64_t ticks, minticks;
unsigned char nonce[8] = {0,};
unsigned char key[32] = {0,};
uint64_t warmup = 4096*4;
uint64_t repeat = 2048;
if (ac >= 2) warmup = atoll(av[1]);
if (ac >= 3) repeat = atoll(av[2]);
printf("Clock frequency: %.1fgHz\n", osfreq()/1000000000);
printf("warming up (factor=%lu)... ", warmup); fflush(stdout);
for (i = 0; i < warmup; i++) {
BENCH_MEMCPY(out, buf, 8192);
buf[i & 8191] += out[i & 8191];
}
printf("ok\nbenchmarking (factor=%lu)\n", repeat);
for (i = 0; lengths[i]; i++) {
minticks = maxticks;
for (j = 0; j < repeat; j++) {
timeit(
BENCH_MEMCPY(out, buf, lengths[i]),
minticks);
buf[j & 8191] += out[i & 8191];
}
if (lengths[i] <= 256)
printf(" - %u bytes, %.0f cycles\n",
(uint32_t)lengths[i], (double)minticks);
else
printf(" - %u bytes, %.2f cycles/byte\n",
(uint32_t)lengths[i], (double)minticks / lengths[i]);
}
return 0;
}
static double osfreq(void)
{
FILE *f;
double result;
int s;
f = fopen("/etc/cpucyclespersecond", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return result;
}
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return 1000.0 * result;
}
f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return result;
}
f = fopen("/proc/cpuinfo","r");
if (f) {
for (;;) {
s = fscanf(f,"cpu MHz : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
if (result) return 1000000.0 * result;
}
f = fopen("/proc/cpuinfo","r");
if (f) {
for (;;) {
s = fscanf(f,"clock : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
if (result) return 1000000.0 * result;
}
f = popen("sysctl hw.cpufrequency 2>/dev/null","r");
if (f) {
s = fscanf(f,"hw.cpufrequency: %lf",&result);
pclose(f);
if (s > 0) if (result > 0) return result;
}
f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r");
if (f) {
s = fscanf(f,"frequency %lf",&result);
pclose(f);
if (s > 0) return result;
}
f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r");
if (f) {
for (;;) {
s = fscanf(f," The %*s processor operates at %lf MHz",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
pclose(f);
if (result) return 1000000.0 * result;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment