Created
June 21, 2023 06:15
-
-
Save cloudwu/cc3cb244323f3bc6f506e4bc0772d057 to your computer and use it in GitHub Desktop.
Compress monotonic 48bits id array, version 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#define CHUNK_N 64 | |
struct eid_chunk { | |
uint64_t id[2]; | |
uint8_t v[CHUNK_N]; | |
}; | |
struct eid_hibits { | |
// uint8_t b[5]; // 40bits | |
uint64_t x64; | |
}; | |
struct eid_array { | |
int n; | |
int discrete; | |
int size; | |
int discrete_top; | |
union { | |
struct eid_chunk c[1]; | |
struct eid_hibits d[1]; | |
} u; | |
}; | |
#define DEFAULT_SIZE 64 | |
struct eid_array * eid_append(struct eid_array *E, uint64_t eid); | |
void eid_free(struct eid_array *E); | |
uint64_t eid_index(struct eid_array *E, int n); | |
int eid_count(struct eid_array *E); | |
static inline size_t | |
eid_size_(int size) { | |
size = (size - 1) * sizeof(struct eid_chunk); | |
return sizeof(struct eid_array) + size; | |
} | |
static struct eid_array * | |
eid_new_(int size) { | |
struct eid_array *E = (struct eid_array *)malloc(eid_size_(size)); | |
E->n = 0; | |
E->discrete = 0; | |
E->size = size; | |
E->discrete_top = size * sizeof(struct eid_chunk) / sizeof(struct eid_hibits) - 1; | |
return E; | |
} | |
static inline uint64_t | |
make_index(uint64_t eid, uint8_t c[3]) { | |
// c[0] : number of section 1 (use id[0] as hibits of eid) | |
// c[1] : number of section 1+2 (use id[1] as hibits of eid) | |
// c[2] : number of section 1+2+3 (use id[1]+1 as hibits of eid) | |
return (eid >> 8) << 24 | (c[0] << 16) | (c[1] << 8) | c[2]; | |
} | |
static void | |
copy(struct eid_array *dst, const struct eid_array *src) { | |
int n = dst->n = src->n; | |
int discrete = dst->discrete = src->discrete; | |
int chunk_n = (n + CHUNK_N - 1) / CHUNK_N; | |
if (n > 0) { | |
memcpy(dst->u.c, src->u.c, chunk_n * sizeof(struct eid_chunk)); | |
memcpy(&dst->u.d[dst->discrete_top - discrete + 1], &src->u.d[src->discrete_top - discrete + 1], discrete * sizeof(struct eid_hibits)); | |
} | |
} | |
static inline void | |
init_chunk(struct eid_chunk *c, uint64_t eid) { | |
uint8_t t[3] = { 1, 1, 1 }; | |
c->id[0] = make_index(eid, t); | |
c->v[0] = eid & 0xff; | |
} | |
static inline int | |
same_hibits(uint64_t eid, uint64_t index) { | |
return (eid >> 8) == (index >> 24); | |
} | |
static inline void | |
set_hibits(struct eid_hibits *h, uint64_t eid) { | |
// int i; | |
// for (i=0;i<5;i++) { | |
// eid >>= 8; | |
// h->b[i] = eid & 0xff; | |
// } | |
h->x64 = eid; | |
} | |
static inline uint64_t | |
get_hibits(struct eid_hibits *h) { | |
// int i; | |
// uint64_t r = 0; | |
// for (i=0;i<5;i++) { | |
// r |= h->b[i] << ((i+1) * 8); | |
// } | |
// return r; | |
return h->x64; | |
} | |
struct eid_array * | |
eid_append(struct eid_array *E, uint64_t eid) { | |
if (E == NULL) { | |
E = eid_new_(DEFAULT_SIZE); | |
} | |
if (E->n == 0) { | |
E->n = 1; | |
E->discrete = 0; | |
init_chunk(&E->u.c[0], eid); | |
return E; | |
} | |
int n = E->n++; | |
int chunk_n = n / CHUNK_N; | |
int need_size = chunk_n + (E->discrete + 1) * sizeof(struct eid_hibits) / sizeof(struct eid_chunk) + 2; | |
if (need_size > E->size) { | |
int new_size = E->size * 3 / 2; | |
assert(new_size >= need_size); | |
struct eid_array * tmp = eid_new_(new_size); | |
copy(tmp, E); | |
free(E); | |
E = tmp; | |
} | |
struct eid_chunk * c = &E->u.c[chunk_n]; | |
int idx = n % CHUNK_N; | |
if (idx == 0) { | |
// new chunk | |
init_chunk(c, eid); | |
return E; | |
} | |
c->v[idx] = eid & 0xff; | |
if (same_hibits(eid, c->id[0])) { | |
c->id[0] += 0x010101; | |
return E; | |
} | |
uint8_t sec1 = (c->id[0] >> 16) & 0xff; | |
if (idx == sec1) { | |
// All elements in this chunk is in section 1 | |
c->id[0] += 0x0101; | |
c->id[1] = (eid >> 8) << 24; | |
return E; | |
} | |
assert(idx > sec1); | |
if (same_hibits(eid, c->id[1])) { | |
// In section 2 | |
c->id[0] += 0x0101; | |
return E; | |
} | |
if (same_hibits(eid - 0x100, c->id[1])) { | |
// In section 3 | |
c->id[0] += 0x01; | |
return E; | |
} | |
// In section 4 | |
int discrete = E->discrete++; | |
if (idx == (c->id[0] & 0xff)) { | |
// The first one of section 4 | |
assert(discrete < 0x1000000); // 24bits | |
c->id[1] |= discrete; | |
} | |
set_hibits(&E->u.d[E->discrete_top - discrete], eid); | |
return E; | |
} | |
void | |
eid_free(struct eid_array *E) { | |
free(E); | |
} | |
static inline uint64_t | |
make_eid(uint64_t hi, uint8_t low) { | |
return (hi >> 24) << 8 | low; | |
} | |
uint64_t | |
eid_index(struct eid_array *E, int n) { | |
if (n >= E->n) | |
return 0; | |
int chunk_n = n / CHUNK_N; | |
struct eid_chunk *c = &E->u.c[chunk_n]; | |
int idx = n % CHUNK_N; | |
uint8_t sec3 = c->id[0] & 0xff; | |
if (idx >= sec3) { | |
int offset = c->id[1] & 0xffffff; | |
struct eid_hibits * d = &E->u.d[E->discrete_top - offset - (idx - sec3)]; | |
// return get_hibits(d) | low; | |
return get_hibits(d); | |
} | |
uint8_t low = c->v[idx]; | |
uint8_t sec1 = (c->id[0] >> 16) & 0xff; | |
if (idx < sec1) | |
return make_eid(c->id[0], low); | |
uint8_t sec2 = (c->id[0] >> 8) & 0xff; | |
uint64_t r = make_eid(c->id[1], low); | |
if (idx >= sec2) | |
return r + 0x100; | |
return r; | |
} | |
int | |
eid_count(struct eid_array *E) { | |
return E->n; | |
} | |
#include <stdio.h> | |
void | |
eid_dump(struct eid_array *E, int chunk_id) { | |
struct eid_chunk *c = &E->u.c[chunk_id]; | |
uint8_t t[3] = { (uint8_t)(c->id[0] >> 16) & 0xff, (uint8_t)(c->id[0] >> 8) & 0xff, (uint8_t)c->id[0] & 0xff }; | |
printf("%llx, %llx, [%d %d %d]\n", c->id[0] >> 24, c->id[1] >> 24, | |
t[0], t[1], t[2]); | |
int i; | |
for (i = 0; i< 64; i++) { | |
printf("%02x ", c->v[i]); | |
} | |
printf("\n"); | |
int index = c->id[1] & 0xffffff; | |
printf("Index = %d discrete = %d\n", index, E->discrete); | |
struct eid_hibits * d = &E->u.d[E->discrete_top - index]; | |
for (i = 0; i< 64 - t[2];i++) { | |
printf("%llx ", get_hibits(d - i)); | |
} | |
printf("\n"); | |
} | |
static void | |
test(struct eid_array *E) { | |
int n = eid_count(E); | |
int i; | |
for (i = 0; i < n; i++) { | |
uint64_t id = eid_index(E, i); | |
if (id != (uint64_t)i * 15 + 1) { | |
printf("%d : %lld\n", i, id); | |
} | |
} | |
} | |
struct eid_array2 { | |
int n; | |
uint64_t *v; | |
}; | |
static uint64_t | |
eid_index2(struct eid_array2 *E, int n) { | |
if (n >= E->n) | |
return 0; | |
return E->v[n]; | |
} | |
static void | |
test2(struct eid_array2 *E) { | |
int n = E->n; | |
int i; | |
for (i = 0; i < n; i++) { | |
uint64_t id = eid_index2(E, i); | |
if (id != (uint64_t)i * 15 + 1) { | |
printf("%d : %lld\n", i, id); | |
} | |
} | |
} | |
#include <time.h> | |
int | |
main() { | |
struct eid_array *E = NULL; | |
int i; | |
int n = 10000000; | |
for (i = 0; i < n; i++) { | |
uint64_t eid = (uint64_t)i*15+1; | |
E = eid_append(E, eid); | |
} | |
eid_dump(E, 0); | |
clock_t c = clock(); | |
for (i=0;i<10;i++) | |
test(E); | |
c = clock() - c; | |
printf("time = %d\n", (int)c); | |
eid_free(E); | |
struct eid_array2 E2; | |
E2.n = n; | |
E2.v = (uint64_t *)malloc(n * sizeof(uint64_t)); | |
for (i = 0; i < n; i++) { | |
uint64_t eid = (uint64_t)i*15+1; | |
E2.v[i] = eid; | |
} | |
c = clock(); | |
for (i=0;i<10;i++) | |
test2(&E2); | |
c = clock() - c; | |
printf("time = %d\n", (int)c); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment