Last active
January 8, 2022 17:31
-
-
Save binji/e70270f1e385ddb91c0a to your computer and use it in GitHub Desktop.
benchmark for various varint formats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/time.h> | |
#include "benchmark/benchmark.h" | |
#define LEB128 1 | |
#define PREFIX_VARINT 1 | |
#define PREFIX_VARINT_LSB 1 | |
#define MAIN 0 | |
#define ALL_U32S 0 | |
#define RUN_BENCHMARK 1 | |
#if MAIN || ALL_U32S | |
#define DECODE_LEB128_NAME decode | |
#define DECODE_PREFIX_VARINT_NAME decode | |
#define DECODE_PREFIX_VARINT_LSB_NAME decode | |
#define ENCODE_LEB128_NAME encode | |
#define ENCODE_PREFIX_VARINT_NAME encode | |
#define ENCODE_PREFIX_VARINT_LSB_NAME encode | |
#endif | |
#if RUN_BENCHMARK | |
#define DECODE_LEB128_NAME decode_leb128 | |
#define DECODE_PREFIX_VARINT_NAME decode_prefix_varint | |
#define DECODE_PREFIX_VARINT_LSB_NAME decode_prefix_varint_lsb | |
#define ENCODE_LEB128_NAME encode_leb128 | |
#define ENCODE_PREFIX_VARINT_NAME encode_prefix_varint | |
#define ENCODE_PREFIX_VARINT_LSB_NAME encode_prefix_varint_lsb | |
#endif | |
#if LEB128 | |
int ENCODE_LEB128_NAME(uint8_t* data, uint32_t value) { | |
int i = 0; | |
do { | |
uint8_t byte = value & 0x7f; | |
value >>= 7; | |
if (value == 0) { | |
data[i++] = byte; | |
break; | |
} else { | |
data[i++] = byte | 0x80; | |
} | |
} while (1); | |
return i; | |
} | |
uint32_t DECODE_LEB128_NAME(uint8_t* data, int* length) { | |
if (__builtin_expect((data[0] & 0x80) == 0, 1)) { | |
*length = 1; | |
return data[0]; | |
} else if ((data[1] & 0x80) == 0) { | |
*length = 2; | |
return (data[1] << 7) | (data[0] & 0x7f); | |
} else if ((data[2] & 0x80) == 0) { | |
*length = 3; | |
return (data[2] << 14) | ((data[1] & 0x7f) << 7) | (data[0] & 0x7f); | |
} else if ((data[3] & 0x80) == 0) { | |
*length = 4; | |
return (data[3] << 21) | ((data[2] & 0x7f) << 14) | | |
((data[1] & 0x7f) << 7) | (data[0] & 0x7f); | |
} else if ((data[4] & 0x80) == 0) { | |
*length = 5; | |
return (data[4] << 28) | ((data[3] & 0x7f) << 21) | | |
((data[2] & 0x7f) << 14) | ((data[1] & 0x7f) << 7) | | |
(data[0] & 0x7f); | |
} | |
} | |
#endif | |
#if PREFIX_VARINT | |
int ENCODE_PREFIX_VARINT_NAME(uint8_t* data, uint32_t value) { | |
if (value < (1 << 7)) { | |
data[0] = value; | |
return 1; | |
} else if (value < (1 << 14)) { | |
data[0] = 0x80 | (value & 0x3f); | |
data[1] = value >> 6; | |
return 2; | |
} else if (value < (1 << 21)) { | |
data[0] = 0xc0 | (value & 0x1f); | |
value >>= 5; | |
memcpy(&data[1], &value, 2); | |
return 3; | |
} else if (value < (1 << 28)) { | |
data[0] = 0xe0 | (value & 0x0f); | |
value >>= 4; | |
memcpy(&data[1], &value, 3); | |
return 4; | |
} else { | |
data[0] = 0xf0 | (value & 0x07); | |
value >>= 3; | |
memcpy(&data[1], &value, 4); | |
return 5; | |
} | |
} | |
uint32_t DECODE_PREFIX_VARINT_NAME(uint8_t* data, int* length) { | |
uint8_t byte = *data; | |
if (__builtin_expect((byte & 0x80) == 0, 1)) { | |
*length = 1; | |
return byte >> 1; | |
} | |
uint32_t result; | |
switch (__builtin_clz(~byte & 0xff)) { | |
case sizeof(int) * 8 - 7: | |
result = data[1] << 6; | |
result |= (byte & 0x3f); | |
*length = 2; | |
break; | |
case sizeof(int) * 8 - 6: | |
result = 0; | |
memcpy(&result, &data[1], 2); | |
result <<= 5; | |
result |= (byte & 0x1f); | |
*length = 3; | |
break; | |
case sizeof(int) * 8 - 5: | |
result = 0; | |
memcpy(&result, &data[1], 3); | |
result <<= 4; | |
result |= (byte & 0x0f); | |
*length = 4; | |
break; | |
case sizeof(int) * 8 - 4: | |
result = 0; | |
memcpy(&result, &data[1], 4); | |
result <<= 3; | |
result |= (byte & 0x07); | |
*length = 5; | |
break; | |
default: | |
assert(0); | |
} | |
return result; | |
} | |
#endif | |
#if PREFIX_VARINT_LSB | |
int ENCODE_PREFIX_VARINT_LSB_NAME(uint8_t* data, uint32_t value) { | |
if (value < (1 << 7)) { | |
data[0] = value << 1; | |
return 1; | |
} else if (value < (1 << 14)) { | |
data[0] = ((value & 0x3f) << 2) | 0x1; | |
data[1] = value >> 6; | |
return 2; | |
} else if (value < (1 << 21)) { | |
data[0] = ((value & 0x1f) << 3) | 0x3; | |
value >>= 5; | |
memcpy(&data[1], &value, 2); | |
return 3; | |
} else if (value < (1 << 28)) { | |
data[0] = ((value & 0x0f) << 4) | 0x7; | |
value >>= 4; | |
memcpy(&data[1], &value, 3); | |
return 4; | |
} else { | |
data[0] = ((value & 0x07) << 5) | 0xf; | |
value >>= 3; | |
memcpy(&data[1], &value, 4); | |
return 5; | |
} | |
} | |
uint32_t DECODE_PREFIX_VARINT_LSB_NAME(uint8_t* data, int* length) { | |
uint8_t byte = *data; | |
if (__builtin_expect((byte & 1) == 0, 1)) { | |
*length = 1; | |
return byte >> 1; | |
} | |
uint32_t result; | |
switch (__builtin_ctz(~*data)) { | |
case 1: | |
result = 0; | |
memcpy(&result, data, 2); | |
result >>= 2; | |
*length = 2; | |
break; | |
case 2: | |
result = 0; | |
memcpy(&result, data, 3); | |
result >>= 3; | |
*length = 3; | |
break; | |
case 3: | |
result = 0; | |
memcpy(&result, data, 4); | |
result >>= 4; | |
*length = 4; | |
break; | |
case 4: | |
memcpy(&result, &data[1], 4); | |
result <<= 3; | |
result |= (byte >> 5) & 0x7; | |
*length = 5; | |
break; | |
default: | |
assert(0); | |
} | |
return result; | |
} | |
#endif | |
#if MAIN | |
int main(int argc, char** argv) { | |
--argc; ++argv; | |
for (; argc > 0; --argc, ++argv) { | |
char* endptr; | |
unsigned long long result = strtoull(*argv, &endptr, 10); | |
uint8_t buffer[5]; | |
size_t len = encode(buffer, result); | |
printf("%llu: ", result); | |
int i = 0; | |
switch (len) { | |
case 5:printf("%02x ", buffer[i++]); | |
case 4:printf("%02x ", buffer[i++]); | |
case 3:printf("%02x ", buffer[i++]); | |
case 2:printf("%02x ", buffer[i++]); | |
case 1: printf("%02x", buffer[i++]); | |
} | |
uint32_t decoded_len; | |
uint32_t decoded = decode(buffer, &decoded_len); | |
printf(" -> %d\n", decoded); | |
} | |
return 0; | |
} | |
#endif | |
#if ALL_U32S | |
int main() { | |
uint8_t buffer[5]; | |
uint32_t i = 0; | |
do { | |
int elen = encode(buffer, i); | |
int dlen; | |
uint32_t result = decode(buffer, &dlen); | |
assert(elen == dlen); | |
assert(i == result); | |
++i; | |
} while (i != 0); | |
} | |
#endif | |
float random01(void) { | |
return (float)rand() / RAND_MAX; | |
} | |
uint32_t random_int(uint32_t min, uint32_t max) { | |
return (uint32_t)(random01() * (max - min)) + min; | |
} | |
uint32_t random_value(int byte_size) { | |
switch (byte_size) { | |
case 1: return random_int(0, (1 << 7) - 1); | |
case 2: return random_int(1 << 7, (1 << 14) - 1); | |
case 3: return random_int(1 << 14, (1 << 21) - 1); | |
case 4: return random_int(1 << 21, (1 << 28) - 1); | |
case 5: return random_int(1 << 28, UINT32_MAX); | |
default: | |
assert(0); | |
return 0; | |
} | |
} | |
typedef int (*encoder_func)(uint8_t*, uint32_t); | |
void randomize_buffer(encoder_func encode, | |
uint8_t* buffer, | |
int count, | |
int byte_size) { | |
for (int i = 0; i < count; ++i) { | |
int len = encode(buffer, random_value(byte_size)); | |
assert(len == byte_size); | |
buffer += len; | |
} | |
} | |
#if RUN_BENCHMARK | |
#define COUNT 10000 | |
#define BUF_SIZE (5 * COUNT) | |
void benchmark_decode_leb128(benchmark::State& state) { | |
uint8_t buffer[BUF_SIZE]; | |
uint8_t* p; | |
randomize_buffer(encode_leb128, buffer, COUNT, state.range_x()); | |
while (state.KeepRunning()) { | |
p = buffer; | |
for (int i = 0; i < COUNT; ++i) { | |
int length; | |
decode_leb128(p, &length); | |
assert(length == state.range_x()); | |
p += length; | |
} | |
} | |
} | |
BENCHMARK(benchmark_decode_leb128)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5); | |
void benchmark_decode_prefix_varint(benchmark::State& state) { | |
uint8_t buffer[BUF_SIZE]; | |
uint8_t* p; | |
randomize_buffer(encode_prefix_varint, buffer, COUNT, state.range_x()); | |
while (state.KeepRunning()) { | |
p = buffer; | |
for (int i = 0; i < COUNT; ++i) { | |
int length; | |
decode_prefix_varint(p, &length); | |
assert(length == state.range_x()); | |
p += length; | |
} | |
} | |
} | |
BENCHMARK(benchmark_decode_prefix_varint)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5); | |
void benchmark_decode_prefix_varint_lsb(benchmark::State& state) { | |
uint8_t buffer[BUF_SIZE]; | |
uint8_t* p; | |
randomize_buffer(encode_prefix_varint_lsb, buffer, COUNT, state.range_x()); | |
while (state.KeepRunning()) { | |
p = buffer; | |
for (int i = 0; i < COUNT; ++i) { | |
int length; | |
decode_prefix_varint_lsb(p, &length); | |
assert(length == state.range_x()); | |
p += length; | |
} | |
} | |
} | |
BENCHMARK(benchmark_decode_prefix_varint_lsb)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5); | |
BENCHMARK_MAIN() | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Run on (4 X 3179.92 MHz CPU s) | |
2016-03-11 22:13:23 | |
Benchmark Time(ns) CPU(ns) Iterations | |
--------------------------------------------------------------------- | |
benchmark_decode_leb128/1 5142 6327 109375 | |
benchmark_decode_leb128/2 6918 8498 79545 | |
benchmark_decode_leb128/3 9039 11136 62500 | |
benchmark_decode_leb128/4 10673 13125 53030 | |
benchmark_decode_leb128/5 12849 15726 43750 | |
benchmark_decode_prefix_varint/1 5142 6290 109375 | |
benchmark_decode_prefix_varint/2 12177 14969 47297 | |
benchmark_decode_prefix_varint/3 17526 14235 48611 | |
benchmark_decode_prefix_varint/4 14329 17600 39773 | |
benchmark_decode_prefix_varint/5 21749 16807 40698 | |
benchmark_decode_prefix_varint_lsb/1 5158 6327 109375 | |
benchmark_decode_prefix_varint_lsb/2 17708 14235 48611 | |
benchmark_decode_prefix_varint_lsb/3 11632 14318 48611 | |
benchmark_decode_prefix_varint_lsb/4 16035 12727 54687 | |
benchmark_decode_prefix_varint_lsb/5 14262 17520 50000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment