Created
September 21, 2014 05:12
-
-
Save nkurz/985470b01b999e67d04b to your computer and use it in GitHub Desktop.
Sample file showing timings for several alignment implementations.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -fno-inline -std=gnu99 -Wall -O3 align.c -o align -lm -DLIKWID -llikwid -lpthread | |
// objdump -d align | less (to confirm that the code hasn't been optimized out) | |
// likwid -m -C2 -g BRANCH align | |
#include <math.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#ifndef ALIGN | |
#define ALIGN 8 | |
#endif | |
#ifndef REPEAT | |
#define REPEAT 1000000 | |
#endif | |
unsigned int baseline(unsigned int x) { | |
return x; | |
} | |
unsigned int align_1(unsigned int x) { | |
return (x + ALIGN - 1) & ~(ALIGN - 1); | |
} | |
unsigned int align_2(unsigned int x) { | |
unsigned int boundary = ALIGN; | |
while (x > boundary) { | |
boundary += ALIGN; | |
} | |
return boundary; | |
} | |
unsigned int align_3(unsigned int x) { | |
return ALIGN * ceil((double)x / ALIGN); | |
} | |
#ifdef LIKWID | |
#include <likwid.h> | |
#else | |
#define likwid_markerInit() | |
#define likwid_markerThreadInit() | |
#define likwid_markerStartRegion(name) | |
#define likwid_markerStopRegion(name) | |
#define likwid_markerClose() | |
#endif // LIKWID | |
int main(int argc, char **argv) { | |
likwid_markerInit(); | |
likwid_markerThreadInit(); | |
char *name = "baseline"; | |
likwid_markerStartRegion(name); | |
for (int i = 1; i <= REPEAT; i++) { | |
if (baseline(i) == 0) { | |
printf("Test failed!"); | |
exit(1); | |
} | |
} | |
likwid_markerStopRegion(name); | |
name = "align_1"; | |
likwid_markerStartRegion(name); | |
for (int i = 1; i <= REPEAT; i++) { | |
if (align_1(i) == 0) { | |
printf("Test failed!"); | |
exit(1); | |
} | |
} | |
likwid_markerStopRegion(name); | |
name = "align_2"; | |
likwid_markerStartRegion(name); | |
for (int i = 1; i <= REPEAT; i++) { | |
if (align_2(i) == 0) { | |
printf("Test failed!"); | |
exit(1); | |
} | |
} | |
likwid_markerStopRegion(name); | |
name = "align_3"; | |
likwid_markerStartRegion(name); | |
for (int i = 1; i <= REPEAT; i++) { | |
if (align_3(i) == 0) { | |
printf("Test failed!"); | |
exit(1); | |
} | |
} | |
likwid_markerStopRegion(name); | |
likwid_markerClose(); | |
exit(0); | |
} | |
#if LIKWID_RESULTS | |
------------------------------------------------------------- | |
------------------------------------------------------------- | |
CPU type: Intel Core Haswell processor | |
CPU clock: 3.39 GHz | |
Measuring group BRANCH | |
------------------------------------------------------------- | |
align | |
===================== | |
Region: baseline | |
===================== | |
+-------------------+------------+ | |
| Region Info | core 2 | | |
+-------------------+------------+ | |
| RDTSC Runtime [s] | 0.00157839 | | |
| call count | 1 | | |
+-------------------+------------+ | |
+------------------------------+-------------+ | |
| Event | core 2 | | |
+------------------------------+-------------+ | |
| INSTR_RETIRED_ANY | 9.00103e+06 | | |
| CPU_CLK_UNHALTED_CORE | 5.3404e+06 | | |
| CPU_CLK_UNHALTED_REF | 5.34018e+06 | | |
| BR_INST_RETIRED_ALL_BRANCHES | 4.00032e+06 | | |
| BR_MISP_RETIRED_ALL_BRANCHES | 47 | | |
+------------------------------+-------------+ | |
+----------------------------+-------------+ | |
| Metric | core 2 | | |
+----------------------------+-------------+ | |
| Runtime (RDTSC) [s] | 0.00157839 | | |
| Runtime unhalted [s] | 0.00157435 | | |
| Clock [MHz] | 3392.28 | | |
| CPI | 0.59331 | | |
| Branch rate | 0.444429 | | |
| Branch misprediction rate | 5.22163e-06 | | |
| Branch misprediction ratio | 1.17491e-05 | | |
| Instructions per branch | 2.25008 | | |
+----------------------------+-------------+ | |
===================== | |
Region: align_1 | |
===================== | |
+-------------------+-----------+ | |
| Region Info | core 2 | | |
+-------------------+-----------+ | |
| RDTSC Runtime [s] | 0.0014741 | | |
| call count | 1 | | |
+-------------------+-----------+ | |
+------------------------------+-------------+ | |
| Event | core 2 | | |
+------------------------------+-------------+ | |
| INSTR_RETIRED_ANY | 1.0001e+07 | | |
| CPU_CLK_UNHALTED_CORE | 5.00746e+06 | | |
| CPU_CLK_UNHALTED_REF | 5.00752e+06 | | |
| BR_INST_RETIRED_ALL_BRANCHES | 4.00031e+06 | | |
| BR_MISP_RETIRED_ALL_BRANCHES | 40 | | |
+------------------------------+-------------+ | |
+----------------------------+-------------+ | |
| Metric | core 2 | | |
+----------------------------+-------------+ | |
| Runtime (RDTSC) [s] | 0.0014741 | | |
| Runtime unhalted [s] | 0.0014762 | | |
| Clock [MHz] | 3392.1 | | |
| CPI | 0.500695 | | |
| Branch rate | 0.399991 | | |
| Branch misprediction rate | 3.99959e-06 | | |
| Branch misprediction ratio | 9.99922e-06 | | |
| Instructions per branch | 2.50006 | | |
+----------------------------+-------------+ | |
===================== | |
Region: align_2 | |
===================== | |
+-------------------+---------+ | |
| Region Info | core 2 | | |
+-------------------+---------+ | |
| RDTSC Runtime [s] | 18.4446 | | |
| call count | 1 | | |
+-------------------+---------+ | |
+------------------------------+-------------+ | |
| Event | core 2 | | |
+------------------------------+-------------+ | |
| INSTR_RETIRED_ANY | 1.8751e+11 | | |
| CPU_CLK_UNHALTED_CORE | 6.25503e+10 | | |
| CPU_CLK_UNHALTED_REF | 6.25503e+10 | | |
| BR_INST_RETIRED_ALL_BRANCHES | 6.25045e+10 | | |
| BR_MISP_RETIRED_ALL_BRANCHES | 1.0001e+06 | | |
+------------------------------+-------------+ | |
+----------------------------+-------------+ | |
| Metric | core 2 | | |
+----------------------------+-------------+ | |
| Runtime (RDTSC) [s] | 18.4446 | | |
| Runtime unhalted [s] | 18.4398 | | |
| Clock [MHz] | 3392.14 | | |
| CPI | 0.333583 | | |
| Branch rate | 0.333339 | | |
| Branch misprediction rate | 5.33357e-06 | | |
| Branch misprediction ratio | 1.60004e-05 | | |
| Instructions per branch | 2.99995 | | |
+----------------------------+-------------+ | |
===================== | |
Region: align_3 | |
===================== | |
+-------------------+------------+ | |
| Region Info | core 2 | | |
+-------------------+------------+ | |
| RDTSC Runtime [s] | 0.00590322 | | |
| call count | 1 | | |
+-------------------+------------+ | |
+------------------------------+-------------+ | |
| Event | core 2 | | |
+------------------------------+-------------+ | |
| INSTR_RETIRED_ANY | 1.90017e+07 | | |
| CPU_CLK_UNHALTED_CORE | 2.00174e+07 | | |
| CPU_CLK_UNHALTED_REF | 2.00175e+07 | | |
| BR_INST_RETIRED_ALL_BRANCHES | 7.00045e+06 | | |
| BR_MISP_RETIRED_ALL_BRANCHES | 64 | | |
+------------------------------+-------------+ | |
+----------------------------+-------------+ | |
| Metric | core 2 | | |
+----------------------------+-------------+ | |
| Runtime (RDTSC) [s] | 0.00590322 | | |
| Runtime unhalted [s] | 0.0059011 | | |
| Clock [MHz] | 3392.12 | | |
| CPI | 1.05345 | | |
| Branch rate | 0.368412 | | |
| Branch misprediction rate | 3.36812e-06 | | |
| Branch misprediction ratio | 9.14227e-06 | | |
| Instructions per branch | 2.71435 | | |
+----------------------------+-------------+ | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment