Skip to content

Instantly share code, notes, and snippets.

@dramforever
Created October 4, 2020 16:19
Show Gist options
  • Save dramforever/9053e6597c0a1731aaa76f3f89379dc3 to your computer and use it in GitHub Desktop.
Save dramforever/9053e6597c0a1731aaa76f3f89379dc3 to your computer and use it in GitHub Desktop.
Inverting a lookup table
sizeof(int) = 4
RAND_MAX = 2147483647
NUM = 536870911
BLOCK_LEN = 2097152
block = 4.859 s
naive = 6.024 s
1 - (block / naive) = 0.193
block = 4.637 s
naive = 5.826 s
1 - (block / naive) = 0.204
block = 4.556 s
naive = 5.803 s
1 - (block / naive) = 0.215
block = 4.550 s
naive = 5.827 s
1 - (block / naive) = 0.219
block = 4.544 s
naive = 5.788 s
1 - (block / naive) = 0.215
block = 4.826 s
naive = 7.558 s
1 - (block / naive) = 0.361
(... more omitted)
#define _GNU_SOURCE
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
// Config
#define NUM (UINT32_MAX / 8)
#define BLOCK_LEN (1 << 21)
#define N_BLOCKS (NUM / BLOCK_LEN)
void invert(uint32_t *in, uint32_t *out) {
uint32_t *tmpx = malloc(NUM * sizeof(uint32_t));
uint32_t *tmpi = malloc(NUM * sizeof(uint32_t));
uint32_t *counter = calloc(N_BLOCKS + 1, sizeof(uint32_t));
for (uint32_t i = 0; i != NUM; i ++) {
counter[1 + in[i] / BLOCK_LEN] ++;
}
for (uint32_t i = 1; i != N_BLOCKS + 1; i ++) {
counter[i] += counter[i - 1];
}
for (uint32_t i = 0; i != NUM; i ++) {
uint32_t place = counter[in[i] / BLOCK_LEN] ++;
tmpx[place] = in[i];
tmpi[place] = i;
}
for (uint32_t i = 0; i != NUM; i ++) {
out[tmpx[i]] = tmpi[i];
}
free(tmpx);
free(tmpi);
free(counter);
}
void check(uint32_t *in, uint32_t *out) {
for (uint32_t i = 0; i != NUM; i ++) {
if (out[i] != 0xffffffffu && in[out[i]] != i) {
fprintf(stderr, "fail %u\n", i);
exit(1);
}
}
}
void bench() {
struct timespec t;
uint32_t *in = malloc(NUM * sizeof(uint32_t));
uint32_t *out = malloc(NUM * sizeof(uint32_t));
for (uint32_t i = 0; i != NUM; i ++) {
in[i] = (uint32_t) rand() % NUM;
}
// Not found = 0xffffffffu
memset(out, 0xff, NUM * sizeof(uint32_t));
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
double t_start_block = (double) t.tv_sec + (double) t.tv_nsec / 1e9;
invert(in, out);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
double t_end_block = (double) t.tv_sec + (double) t.tv_nsec / 1e9;
check(in, out);
memset(out, 0xff, NUM * sizeof(uint32_t));
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
double t_start_naive = (double) t.tv_sec + (double) t.tv_nsec / 1e9;
for (uint32_t i = 0; i != NUM; i ++) {
out[in[i]] = i;
}
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
double t_end_naive = (double) t.tv_sec + (double) t.tv_nsec / 1e9;
check(in, out);
double t_block = t_end_block - t_start_block;
double t_naive = t_end_naive - t_start_naive;
fprintf(stderr, "block = %.3lf s\n", t_block);
fprintf(stderr, "naive = %.3lf s\n", t_naive);
fprintf(stderr, "1 - (block / naive) = %.3lf\n", 1 - (t_block / t_naive));
free(in);
free(out);
}
int main() {
fprintf(stderr, "sizeof(int) = %zu\n", sizeof(int));
fprintf(stderr, "RAND_MAX = %u\n", RAND_MAX);
fprintf(stderr, "NUM = %u\n", NUM);
fprintf(stderr, "BLOCK_LEN = %u\n", BLOCK_LEN);
for (int i = 0; i < 1000; i ++) {
bench();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment