Created
October 4, 2020 16:19
-
-
Save dramforever/9053e6597c0a1731aaa76f3f89379dc3 to your computer and use it in GitHub Desktop.
Inverting a lookup table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sizeof(int) = 4 | |
RAND_MAX = 2147483647 | |
NUM = 536870911 | |
BLOCK_LEN = 2097152 | |
block = 4.859 s | |
naive = 6.024 s | |
1 - (block / naive) = 0.193 | |
block = 4.637 s | |
naive = 5.826 s | |
1 - (block / naive) = 0.204 | |
block = 4.556 s | |
naive = 5.803 s | |
1 - (block / naive) = 0.215 | |
block = 4.550 s | |
naive = 5.827 s | |
1 - (block / naive) = 0.219 | |
block = 4.544 s | |
naive = 5.788 s | |
1 - (block / naive) = 0.215 | |
block = 4.826 s | |
naive = 7.558 s | |
1 - (block / naive) = 0.361 | |
(... more omitted) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdio.h> | |
#include <time.h> | |
// Config | |
#define NUM (UINT32_MAX / 8) | |
#define BLOCK_LEN (1 << 21) | |
#define N_BLOCKS (NUM / BLOCK_LEN) | |
void invert(uint32_t *in, uint32_t *out) { | |
uint32_t *tmpx = malloc(NUM * sizeof(uint32_t)); | |
uint32_t *tmpi = malloc(NUM * sizeof(uint32_t)); | |
uint32_t *counter = calloc(N_BLOCKS + 1, sizeof(uint32_t)); | |
for (uint32_t i = 0; i != NUM; i ++) { | |
counter[1 + in[i] / BLOCK_LEN] ++; | |
} | |
for (uint32_t i = 1; i != N_BLOCKS + 1; i ++) { | |
counter[i] += counter[i - 1]; | |
} | |
for (uint32_t i = 0; i != NUM; i ++) { | |
uint32_t place = counter[in[i] / BLOCK_LEN] ++; | |
tmpx[place] = in[i]; | |
tmpi[place] = i; | |
} | |
for (uint32_t i = 0; i != NUM; i ++) { | |
out[tmpx[i]] = tmpi[i]; | |
} | |
free(tmpx); | |
free(tmpi); | |
free(counter); | |
} | |
void check(uint32_t *in, uint32_t *out) { | |
for (uint32_t i = 0; i != NUM; i ++) { | |
if (out[i] != 0xffffffffu && in[out[i]] != i) { | |
fprintf(stderr, "fail %u\n", i); | |
exit(1); | |
} | |
} | |
} | |
void bench() { | |
struct timespec t; | |
uint32_t *in = malloc(NUM * sizeof(uint32_t)); | |
uint32_t *out = malloc(NUM * sizeof(uint32_t)); | |
for (uint32_t i = 0; i != NUM; i ++) { | |
in[i] = (uint32_t) rand() % NUM; | |
} | |
// Not found = 0xffffffffu | |
memset(out, 0xff, NUM * sizeof(uint32_t)); | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t); | |
double t_start_block = (double) t.tv_sec + (double) t.tv_nsec / 1e9; | |
invert(in, out); | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t); | |
double t_end_block = (double) t.tv_sec + (double) t.tv_nsec / 1e9; | |
check(in, out); | |
memset(out, 0xff, NUM * sizeof(uint32_t)); | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t); | |
double t_start_naive = (double) t.tv_sec + (double) t.tv_nsec / 1e9; | |
for (uint32_t i = 0; i != NUM; i ++) { | |
out[in[i]] = i; | |
} | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t); | |
double t_end_naive = (double) t.tv_sec + (double) t.tv_nsec / 1e9; | |
check(in, out); | |
double t_block = t_end_block - t_start_block; | |
double t_naive = t_end_naive - t_start_naive; | |
fprintf(stderr, "block = %.3lf s\n", t_block); | |
fprintf(stderr, "naive = %.3lf s\n", t_naive); | |
fprintf(stderr, "1 - (block / naive) = %.3lf\n", 1 - (t_block / t_naive)); | |
free(in); | |
free(out); | |
} | |
int main() { | |
fprintf(stderr, "sizeof(int) = %zu\n", sizeof(int)); | |
fprintf(stderr, "RAND_MAX = %u\n", RAND_MAX); | |
fprintf(stderr, "NUM = %u\n", NUM); | |
fprintf(stderr, "BLOCK_LEN = %u\n", BLOCK_LEN); | |
for (int i = 0; i < 1000; i ++) { | |
bench(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment