-
-
Save chandlerc/0ac03383a56717204806185a580c82e4 to your computer and use it in GitHub Desktop.
Intel ERMSB benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* bench.h - benchmark harness | |
Written in 2014 by Austin Seipp <[email protected]> | |
To the extent possible under law, the author(s) have dedicated all | |
copyright and related and neighboring rights to this software to | |
the public domain worldwide. This software is distributed without | |
any warranty. | |
You should have received a copy of the CC0 Public Domain Dedication | |
along with this software. If not, see | |
<http://creativecommons.org/publicdomain/zero/1.0/>. | |
*/ | |
#ifndef __BENCH_H__ | |
#define __BENCH_H__ | |
#if defined(USE_LINUX_PERF) && defined(__linux__) | |
#include <unistd.h> | |
#include <sys/syscall.h> | |
#include <linux/perf_event.h> | |
static int ticks_fddev = -1; | |
uint64_t get_ticks(void) | |
{ | |
uint64_t result; | |
if (ticks_fddev == -1) { | |
static struct perf_event_attr attr; | |
attr.type = PERF_TYPE_HARDWARE; | |
attr.config = PERF_COUNT_HW_CPU_CYCLES; | |
ticks_fddev = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); | |
} | |
if (read(ticks_fddev,&result,sizeof result) != sizeof result) return 0; | |
return result; | |
} | |
#else | |
static inline uint64_t | |
get_ticks(void) | |
{ | |
#if defined(__COMPCERT__) | |
return __builtin_rdtsc(); | |
#elif defined(__i386__) || defined(__amd64__) | |
uint32_t lo = 0, hi = 0; | |
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); | |
return ((uint64_t)lo | ((uint64_t)hi << 32)); | |
#elif defined(__powerpc__) | |
uint32_t lo = 0, hi = 0; | |
__asm__ __volatile__("mftbu %0; mftb %1" : "=r" (hi), "=r" (lo)); | |
return ((uint64_t)lo | ((uint64_t)hi << 32)); | |
#else | |
#error need a get_ticks() function | |
#endif | |
} | |
#endif | |
#define timeit(x,minvar) { \ | |
ticks = get_ticks(); \ | |
x; \ | |
ticks = get_ticks() - ticks; \ | |
if (ticks < minvar) \ | |
minvar = ticks; \ | |
} | |
#define maxticks 0xffffffffffffffffull | |
#include "osfreq.c" | |
#endif /* __BENCH_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include "bench.h" | |
#define NL "\n\t" | |
#define ALIGN64 __attribute__((__aligned__(64))) | |
static inline __attribute__((always_inline)) | |
void | |
ermsb_memcpy(void* dst, void* src, size_t sz) | |
{ | |
__asm__ volatile("movq %0, %%rsi" NL | |
"movq %1, %%rdi" NL | |
"movq %2, %%rcx" NL | |
"rep movsb" NL | |
: /* no outputs */ | |
: "g" (src), "g" (dst), "g" (sz) | |
: "rsi", "rdi", "rcx" | |
); | |
} | |
#ifdef ERMSB | |
#define BENCH_MEMCPY ermsb_memcpy | |
#else | |
#define BENCH_MEMCPY memcpy | |
#endif | |
int | |
main(int ac, char** av) | |
{ | |
static size_t lengths[] = {16, 64, 256, 1024, 8192, 0}; | |
ALIGN64 unsigned char buf[8192] = {255}; | |
ALIGN64 unsigned char out[8192] = {0,}; | |
size_t i, j; | |
uint64_t ticks, minticks; | |
unsigned char nonce[8] = {0,}; | |
unsigned char key[32] = {0,}; | |
uint64_t warmup = 4096*4; | |
uint64_t repeat = 2048; | |
if (ac >= 2) warmup = atoll(av[1]); | |
if (ac >= 3) repeat = atoll(av[2]); | |
printf("Clock frequency: %.1fgHz\n", osfreq()/1000000000); | |
printf("warming up (factor=%lu)... ", warmup); fflush(stdout); | |
for (i = 0; i < warmup; i++) { | |
BENCH_MEMCPY(out, buf, 8192); | |
buf[i & 8191] += out[i & 8191]; | |
} | |
printf("ok\nbenchmarking (factor=%lu)\n", repeat); | |
for (i = 0; lengths[i]; i++) { | |
minticks = maxticks; | |
for (j = 0; j < repeat; j++) { | |
timeit( | |
BENCH_MEMCPY(out, buf, lengths[i]), | |
minticks); | |
buf[j & 8191] += out[i & 8191]; | |
} | |
if (lengths[i] <= 256) | |
printf(" - %u bytes, %.0f cycles\n", | |
(uint32_t)lengths[i], (double)minticks); | |
else | |
printf(" - %u bytes, %.2f cycles/byte\n", | |
(uint32_t)lengths[i], (double)minticks / lengths[i]); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static double osfreq(void) | |
{ | |
FILE *f; | |
double result; | |
int s; | |
f = fopen("/etc/cpucyclespersecond", "r"); | |
if (f) { | |
s = fscanf(f,"%lf",&result); | |
fclose(f); | |
if (s > 0) return result; | |
} | |
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r"); | |
if (f) { | |
s = fscanf(f,"%lf",&result); | |
fclose(f); | |
if (s > 0) return 1000.0 * result; | |
} | |
f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r"); | |
if (f) { | |
s = fscanf(f,"%lf",&result); | |
fclose(f); | |
if (s > 0) return result; | |
} | |
f = fopen("/proc/cpuinfo","r"); | |
if (f) { | |
for (;;) { | |
s = fscanf(f,"cpu MHz : %lf",&result); | |
if (s > 0) break; | |
if (s == 0) s = fscanf(f,"%*[^\n]\n"); | |
if (s < 0) { result = 0; break; } | |
} | |
fclose(f); | |
if (result) return 1000000.0 * result; | |
} | |
f = fopen("/proc/cpuinfo","r"); | |
if (f) { | |
for (;;) { | |
s = fscanf(f,"clock : %lf",&result); | |
if (s > 0) break; | |
if (s == 0) s = fscanf(f,"%*[^\n]\n"); | |
if (s < 0) { result = 0; break; } | |
} | |
fclose(f); | |
if (result) return 1000000.0 * result; | |
} | |
f = popen("sysctl hw.cpufrequency 2>/dev/null","r"); | |
if (f) { | |
s = fscanf(f,"hw.cpufrequency: %lf",&result); | |
pclose(f); | |
if (s > 0) if (result > 0) return result; | |
} | |
f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r"); | |
if (f) { | |
s = fscanf(f,"frequency %lf",&result); | |
pclose(f); | |
if (s > 0) return result; | |
} | |
f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r"); | |
if (f) { | |
for (;;) { | |
s = fscanf(f," The %*s processor operates at %lf MHz",&result); | |
if (s > 0) break; | |
if (s == 0) s = fscanf(f,"%*[^\n]\n"); | |
if (s < 0) { result = 0; break; } | |
} | |
pclose(f); | |
if (result) return 1000000.0 * result; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment