Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ttsiodras/7b4f4d6948886500cd8fa32fc90dab41 to your computer and use it in GitHub Desktop.
Save ttsiodras/7b4f4d6948886500cd8fa32fc90dab41 to your computer and use it in GitHub Desktop.
Dumb (yet faster!) version of Agner Fog's optimisation example ( see https://stackoverflow.com/questions/72306573/speed-miracles-in-x86-amd64-land )
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <x86intrin.h>
#define LEN 1000000
double data[LEN];
void compute()
{
const double A = 1.1, B = 2.2, C = 3.3;
int i;
for(i=0; i<LEN; i++) {
data[i] = A*i*i + B*i + C;
}
}
unsigned long long ts2ns(const struct timespec *ts)
{
return ts->tv_sec * 1e9 + ts->tv_nsec;
}
int main(int argc, char *argv[])
{
unsigned long long mini = 1e9;
for(int i=0; i<1000; i++) {
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC_RAW, &t1);
compute();
clock_gettime(CLOCK_MONOTONIC_RAW, &t2);
unsigned long long diff = ts2ns(&t2) - ts2ns(&t1);
if (mini > diff) mini = diff;
}
printf("[-] Took: %lld ns.\n", mini);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment