Skip to content

Instantly share code, notes, and snippets.

@buchgr
Created February 7, 2015 10:15
Show Gist options
  • Save buchgr/3197c55ab96fdc142e49 to your computer and use it in GitHub Desktop.
Save buchgr/3197c55ab96fdc142e49 to your computer and use it in GitHub Desktop.
// decompress a varint encoded integer
uint64_t decompress(uint8_t* mem) {
// mask to extract the msb of each individual byte
static const uint64_t loMask = 0x8080808080808080;
static const uint64_t decompMask[8] = {0x7F, 0x7F7F,
0x7F7F7F, 0x7F7F7F7F,
0x7F7F7F7F7F, 0x7F7F7F7F7F7F,
0x7F7F7F7F7F7F7F, 0x7F7F7F7F7F7F7F7F};
uint64_t val = *reinterpret_cast<uint64_t*>(mem);
// most significant bits of each individual byte
// of val.
uint64_t msbits = 0;
__asm__ __volatile__("pext %0, %1, %2"
: "=r" (msbits)
: "r" (val), "r" (loMask));
// negate msbits so we can use __builtin_ctzll to
// count number of trailing zeros aka number of
// bytes minus one.
int len = __builtin_ctzll(~msbits);
uint64_t decompVal = 0;
__asm__ __volatile__("pext %0, %1, %2"
: "=r" (decompVal)
: "r" (val), "r" (decompMask[len]));
// TODO: experiment with moving this branch far away in the executable
// in order to no pollute the instruction cache.
// we are decompressing an actual eight byte integer, which
// needs ten bytes (!) in varint encoding.
if (len == 8) {
// load two more bytes and prepend them to decompVal
uint64_t val2 = *reinterpret_cast<uint16_t*>(mem + sizeof(uint64_t));
// swap the 7th and 8th bit (zero based)
uint64_t tmp = ((val2 >> 7) ^ (val2 >> 8)) & 1;
val2 = val2 ^ ((tmp << 7) | (tmp << 8));
mem += 10;
return decompVal | (val2 << 54);
}
mem += len + 1;
return decompVal;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment