Skip to content

Instantly share code, notes, and snippets.

@nicebyte
Created May 31, 2018 06:42
Show Gist options
  • Save nicebyte/16674f71648dfb24daeabec495a37463 to your computer and use it in GitHub Desktop.
Save nicebyte/16674f71648dfb24daeabec495a37463 to your computer and use it in GitHub Desktop.
/* Trying to achieve better cache locality for 2d
* arrays using Morton order.
*
* compile with:
* gcc morton.c -O3 -lrt -mbmi2
*/
#define _POSIX_C_SOURCE 199309L
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <x86intrin.h>
typedef struct buffer {
uint32_t *data;
uint16_t width;
} buffer;
buffer allocate_buffer(uint16_t width, uint16_t height) {
buffer result = {
.data = (uint32_t*) malloc(sizeof(uint32_t) * width * height),
.width = width,
};
return result;
}
void free_buffer(buffer b) {
free(b.data);
}
/* fast bitwise interleave using parallel bit deposit
* (https://www.felixcloutier.com/x86/PDEP.html) */
#define INTERLEAVE_BITS(x, y) (_pdep_u32(x,0x55555555)|_pdep_u32(y,0xaaaaaaaa))
/* read buffer using naive layout */
#define READ_BUF_BASELINE(buf, x, y) (buf.data[y * buf.width + x])
/* read buffer using morton order */
#define READ_BUF_MORTON(buf, x, y) (buf.data[INTERLEAVE_BITS(x, y)])
/* how many instances of the microbenchmark to run*/
#define NBENCHMARKS 1000
int main() {
uint64_t results[NBENCHMARKS];
buffer b = allocate_buffer(4096, 4096);
struct timespec start, end;
volatile uint32_t target; /* prevent compiler from optimizing out the read */
for (uint32_t rid = 0; rid < NBENCHMARKS; ++rid) {
clock_gettime(CLOCK_MONOTONIC, &start);
for (uint32_t i = 0; i < 4096; ++i) {
for (uint32_t j = 1024; j < 3072; ++j) {
//target = READ_BUF_BASELINE(b, j, i);
target = READ_BUF_MORTON(b, j, i);
}
}
clock_gettime(CLOCK_MONOTONIC, &end);
printf("%ld\n",
1000000000 * (end.tv_sec - start.tv_sec) +
end.tv_nsec - start.tv_nsec);
buffer tmp = allocate_buffer(4096, 4096);
free_buffer(b);
b = tmp;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment