Skip to content

Instantly share code, notes, and snippets.

@kaja47
Created April 12, 2016 15:01
Show Gist options
  • Save kaja47/16f993ded4e71870932be8789e413c46 to your computer and use it in GitHub Desktop.
Save kaja47/16f993ded4e71870932be8789e413c46 to your computer and use it in GitHub Desktop.
Another hardware mystery
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// STEPS = 4 830ms
// STEPS = 16 4850ms
int main(int argc, char *argv[]) {
int STEPS = atoi(argv[1]);
int stride = 32*1024; // access stride in bytes
char* arr = malloc(stride * STEPS);
int iterations = 1000000000 / STEPS;
clock_t cl = clock();
char sum = 0;
for (int i = 0; i < iterations; i++) {
for (int j = 0; j < STEPS*stride; j += stride) {
sum += arr[j];
// this prevents gcc from folding both loops
// and limits speculative execution
// (CPU can still prefetch data though)
arr[j] += sum;
}
}
clock_t end = clock();
printf("%d %fms\n", STEPS, 1000.0 * (end-cl) / CLOCKS_PER_SEC);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment