Created
March 1, 2020 16:03
-
-
Save arnetheduck/d665107931b1ee5a29214df5f96c6ccd to your computer and use it in GitHub Desktop.
work in progress varint codec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Little-endian base 128 variable length integer utilities, as seen in | |
# https://en.wikipedia.org/wiki/LEB128 | |
# | |
# Used in formats like DWARF, WASM - unsigned variant identical to protobuf/go | |
# varint. Exception/Defect free for WASM use | |
import | |
stew/[bitops2, result] | |
const | |
# Given the truncated logarithm of a number, how many bytes do we need to | |
# encode it? | |
lengths = block: | |
var v: array[64, int8] | |
for i in 0..<64: | |
v[i] = int8((i + 7) div 7) | |
v | |
func leb128Len*(x: SomeUnsignedInt): int8 {.inline, raises: [].} = | |
## Returns number of bytes required to encode integer ``x`` as varint. | |
if x == 0: 1 | |
else: lengths[log2trunc(x)] | |
func maxLeb128Len*(T: type): int8 {.inline, raises: [].} = leb128Len(T.high) | |
type | |
LebBuf*[T: SomeUnsignedInt] = object | |
data*: array[maxLeb128Len(T), byte] # len(data) <= 10 | |
len*: int8 # >= 1 when holding valid protobuf | |
LebError* = enum | |
incomplete = "End marker not found in given stream" | |
overflow = "Value would overflow" | |
template write7(next: untyped) = | |
if v > type(v)(127): | |
result.data[result.len] = cast[byte](v) or 0x80'u8 | |
result.len += 1 | |
v = v shr 7 | |
next | |
func toBytesLeb128*(v: SomeUnsignedInt): LebBuf[typeof(v)] {.noinit, raises: [].} = | |
var | |
v = v | |
# A really clever person would write a macro for this expansion, and then only | |
# really clever people would be able to read it. | |
# There is something clever about this code however: a clever compiler will | |
# use type information to cleverly optimze away the dead code, which is one of | |
# the clever reasons toBytes is generic | |
write7(): # 7 | |
write7(): # 14 | |
write7(): # 21 | |
write7(): # 28 | |
write7(): # 35 | |
write7(): # 42 | |
write7(): # 49 | |
write7(): # 56 | |
write7(): # 63 | |
discard | |
result.data[result.len] = cast[byte](v) # high bit not set since v <= 127 at this point! | |
result.len += 1 | |
template read7(shift: untyped) = | |
if (shift div 7) >= x.len(): | |
return err(incomplete) | |
let | |
b = x[shift div 7] | |
valb = b and 0x7f'u8 | |
val = T(valb) | |
vals = val shl shift | |
if vals shr shift != val: | |
return err(overflow) | |
res = res or vals | |
if b == valb: # High bit not set | |
return ok((res, int8((shift div 7) + 1))) | |
func fromBytesLeb128*( | |
T: typedesc[SomeUnsignedInt], | |
x: openArray[byte]): Result[tuple[val: T, len: int8], LebError] {.raises: [].} = | |
## Parse a LEB128 byte sequence and return value and how many bytes were | |
## parsed | |
var | |
res: T | |
read7(0) | |
read7(7) | |
read7(14) | |
read7(21) | |
read7(28) | |
read7(35) | |
read7(42) | |
read7(49) | |
read7(56) | |
read7(63) | |
err(incomplete) | |
template toOpenArray*(v: LebBuf): openArray[byte] = | |
toOpenArray(v.data, 0, v.len - 1) | |
template len*(v: LebBuf): auto = v.len | |
template `@`*(v: LebBuf): seq[byte] = | |
@v.toOpenArray() | |
func scanLeb128*( | |
T: typedesc[SomeUnsignedInt], | |
x: openArray[byte]): Result[int8, LebError] = | |
## Scan a buffer for a valid leb128-encoded value that at most fits in a | |
## uint64, and report how many bytes it uses | |
# TODO this can be done efficiently with SSE | |
T.fromBytesLeb128(x).map(proc(a: auto): auto = a.len) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment