Created
February 7, 2015 10:15
-
-
Save buchgr/3197c55ab96fdc142e49 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// decompress a varint encoded integer | |
uint64_t decompress(uint8_t* mem) { | |
// mask to extract the msb of each individual byte | |
static const uint64_t loMask = 0x8080808080808080; | |
static const uint64_t decompMask[8] = {0x7F, 0x7F7F, | |
0x7F7F7F, 0x7F7F7F7F, | |
0x7F7F7F7F7F, 0x7F7F7F7F7F7F, | |
0x7F7F7F7F7F7F7F, 0x7F7F7F7F7F7F7F7F}; | |
uint64_t val = *reinterpret_cast<uint64_t*>(mem); | |
// most significant bits of each individual byte | |
// of val. | |
uint64_t msbits = 0; | |
__asm__ __volatile__("pext %0, %1, %2" | |
: "=r" (msbits) | |
: "r" (val), "r" (loMask)); | |
// negate msbits so we can use __builtin_ctzll to | |
// count number of trailing zeros aka number of | |
// bytes minus one. | |
int len = __builtin_ctzll(~msbits); | |
uint64_t decompVal = 0; | |
__asm__ __volatile__("pext %0, %1, %2" | |
: "=r" (decompVal) | |
: "r" (val), "r" (decompMask[len])); | |
// TODO: experiment with moving this branch far away in the executable | |
// in order to no pollute the instruction cache. | |
// we are decompressing an actual eight byte integer, which | |
// needs ten bytes (!) in varint encoding. | |
if (len == 8) { | |
// load two more bytes and prepend them to decompVal | |
uint64_t val2 = *reinterpret_cast<uint16_t*>(mem + sizeof(uint64_t)); | |
// swap the 7th and 8th bit (zero based) | |
uint64_t tmp = ((val2 >> 7) ^ (val2 >> 8)) & 1; | |
val2 = val2 ^ ((tmp << 7) | (tmp << 8)); | |
mem += 10; | |
return decompVal | (val2 << 54); | |
} | |
mem += len + 1; | |
return decompVal; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment