Created
May 31, 2018 06:42
-
-
Save nicebyte/16674f71648dfb24daeabec495a37463 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Trying to achieve better cache locality for 2d | |
* arrays using Morton order. | |
* | |
* compile with: | |
* gcc morton.c -O3 -lrt -mbmi2 | |
*/ | |
#define _POSIX_C_SOURCE 199309L | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <x86intrin.h> | |
typedef struct buffer { | |
uint32_t *data; | |
uint16_t width; | |
} buffer; | |
buffer allocate_buffer(uint16_t width, uint16_t height) { | |
buffer result = { | |
.data = (uint32_t*) malloc(sizeof(uint32_t) * width * height), | |
.width = width, | |
}; | |
return result; | |
} | |
void free_buffer(buffer b) { | |
free(b.data); | |
} | |
/* fast bitwise interleave using parallel bit deposit | |
* (https://www.felixcloutier.com/x86/PDEP.html) */ | |
#define INTERLEAVE_BITS(x, y) (_pdep_u32(x,0x55555555)|_pdep_u32(y,0xaaaaaaaa)) | |
/* read buffer using naive layout */ | |
#define READ_BUF_BASELINE(buf, x, y) (buf.data[y * buf.width + x]) | |
/* read buffer using morton order */ | |
#define READ_BUF_MORTON(buf, x, y) (buf.data[INTERLEAVE_BITS(x, y)]) | |
/* how many instances of the microbenchmark to run*/ | |
#define NBENCHMARKS 1000 | |
int main() { | |
uint64_t results[NBENCHMARKS]; | |
buffer b = allocate_buffer(4096, 4096); | |
struct timespec start, end; | |
volatile uint32_t target; /* prevent compiler from optimizing out the read */ | |
for (uint32_t rid = 0; rid < NBENCHMARKS; ++rid) { | |
clock_gettime(CLOCK_MONOTONIC, &start); | |
for (uint32_t i = 0; i < 4096; ++i) { | |
for (uint32_t j = 1024; j < 3072; ++j) { | |
//target = READ_BUF_BASELINE(b, j, i); | |
target = READ_BUF_MORTON(b, j, i); | |
} | |
} | |
clock_gettime(CLOCK_MONOTONIC, &end); | |
printf("%ld\n", | |
1000000000 * (end.tv_sec - start.tv_sec) + | |
end.tv_nsec - start.tv_nsec); | |
buffer tmp = allocate_buffer(4096, 4096); | |
free_buffer(b); | |
b = tmp; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment