Created
June 21, 2023 02:16
-
-
Save cloudwu/44fd12a14aa77db0b3de1ec2839e1031 to your computer and use it in GitHub Desktop.
Compress monotonic 48bits id array
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <stddef.h> | |
#include <stdlib.h> | |
#include <string.h> | |
// .size is size of sec[] | |
// .size * 8 >= .section_n * 8 + .n | |
// 64bit section : | |
// eid is a 48bits id (base 1, 0 is invalid id) | |
// [0 , 24) offset of section (24bits) | |
// [40, 64) high bits of eid (40bits) | |
struct eid_array { | |
int n; | |
int section_n; | |
int size; | |
uint32_t last_position; | |
uint64_t sec[1]; | |
}; | |
#define DEFAULT_SIZE 64 | |
struct eid_array * eid_append(struct eid_array *E, uint64_t eid); | |
void eid_free(struct eid_array *E); | |
uint64_t eid_lookup(struct eid_array *E, int n); | |
int eid_index(struct eid_array *E, uint64_t eid); // todo | |
int eid_index_hint(struct eid_array *E, uint64_t eid, int *hint); // todo | |
int eid_count(struct eid_array *E); | |
size_t eid_size(struct eid_array *E); | |
void eid_clear(struct eid_array *E); | |
void eid_mark_remove(struct eid_array *E, uint64_t eid); // todo | |
void eid_remove(struct eid_array *E, uint64_t from); // todo | |
static inline size_t | |
eid_size_(int size) { | |
size *= sizeof(uint64_t); | |
return sizeof(struct eid_array) - sizeof(uint64_t) + size; | |
} | |
static struct eid_array * | |
eid_new_(int size) { | |
struct eid_array *E = (struct eid_array *)malloc(eid_size_(size)); | |
E->n = 0; | |
E->section_n = 0; | |
E->size = size; | |
E->last_position = 0; | |
return E; | |
} | |
static inline uint8_t * | |
address(const struct eid_array *E, int offset) { | |
uint8_t * top = (uint8_t *)&E->sec[E->size]; | |
return top - offset - 1; | |
} | |
static void | |
copy(struct eid_array *dst, const struct eid_array *src) { | |
dst->n = src->n; | |
dst->section_n = src->section_n; | |
dst->last_position = src->last_position; | |
if (dst->n > 0) { | |
memcpy(dst->sec, src->sec, (src->section_n + 1) * sizeof(uint64_t)); | |
memcpy(address(dst, dst->n-1), address(src, src->n-1), src->n); | |
} | |
} | |
struct eid_array * | |
eid_append(struct eid_array *E, uint64_t eid) { | |
if (E == NULL) { | |
E = eid_new_(DEFAULT_SIZE); | |
} | |
if (E->n == 0) { | |
E->n = 1; | |
E->section_n = 1; | |
E->sec[0] = (eid >> 8) << 24; | |
E->sec[1] = 1; | |
*address(E, 0) = eid & 0xff; | |
return E; | |
} | |
int n = E->n++; | |
if (n >= (E->size - E->section_n - 2) * sizeof(uint64_t)) { | |
struct eid_array * tmp = eid_new_(E->size * 3 / 2); | |
copy(tmp, E); | |
free(E); | |
E = tmp; | |
} | |
uint8_t low = eid & 0xff; | |
uint64_t sec = E->sec[E->section_n-1] >> 24; | |
uint8_t *ptr = address(E, n); | |
*ptr = low; | |
if ((eid >> 8) == sec) { | |
// the same section | |
assert(low > ptr[1]); | |
++E->sec[E->section_n]; | |
} else { | |
// new section | |
uint64_t eid_high = eid >> 8; | |
int sn = E->section_n++; | |
E->sec[sn] = (eid_high << 24) | n; | |
E->sec[sn+1] = n+1; | |
} | |
return E; | |
} | |
void eid_free(struct eid_array *E) { | |
free(E); | |
} | |
static int | |
lookup_section(struct eid_array *E, int from_section, int n) { | |
int begin = from_section; | |
int end = E->section_n; | |
while (begin < end) { | |
int mid = (begin + end) / 2; | |
int offset = E->sec[mid] & 0xffffff; | |
if (offset == n) | |
return mid; | |
else if (offset < n) { | |
begin = mid + 1; | |
} else { | |
end = mid; | |
} | |
} | |
return begin - 1; | |
} | |
static inline uint64_t | |
combine_eid(uint64_t sec, uint8_t low) { | |
return ((sec >> 24) << 8) | low; | |
} | |
uint64_t | |
eid_lookup(struct eid_array *E, int n) { | |
if (n >= E->n) | |
return 0; | |
int sn = E->last_position >> 8; | |
uint64_t sec = E->sec[sn]; | |
int offset = sec & 0xffffff; | |
if (n < offset) { | |
// before last_position | |
E->last_position = 0; | |
return eid_lookup(E, n); | |
} | |
int next = E->sec[sn+1] & 0xffffff; | |
if (n < next) { | |
E->last_position = (sn << 8) | (n - offset); | |
return combine_eid(sec, *address(E, n)); | |
} else { | |
++sn; | |
if (n > next) { | |
sn = lookup_section(E, sn, n); | |
E->last_position = sn << 8 | (n - (E->sec[sn] & 0xffffff)); | |
} else { | |
E->last_position = sn << 8; | |
} | |
return combine_eid(E->sec[sn], *address(E, n)); | |
} | |
} | |
int | |
eid_count(struct eid_array *E) { | |
return E->n; | |
} | |
size_t | |
eid_size(struct eid_array *E) { | |
return eid_size_(E->size); | |
} | |
void | |
eid_clear(struct eid_array *E) { | |
E->n = 0; | |
E->section_n = 0; | |
} | |
#include <stdio.h> | |
static void | |
test(struct eid_array *E) { | |
int n = eid_count(E); | |
int i; | |
for (i = 0; i < n; i++) { | |
uint64_t id = eid_lookup(E, i); | |
if (id != (uint64_t)i * 2 + 1) { | |
printf("%d : %lld\n", i, id); | |
} | |
} | |
} | |
int | |
main() { | |
struct eid_array *E = NULL; | |
int i; | |
int n = 10000000; | |
for (i = 0; i < n; i++) { | |
uint64_t eid = (uint64_t)i*2+1; | |
E = eid_append(E, eid); | |
} | |
test(E); | |
eid_free(E); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment