arnetheduck · March 1, 2020 16:03
diff --git a/leb128.nim b/leb128.nim
 # Little-endian base 128 variable length integer utilities, as seen in
 # https://en.wikipedia.org/wiki/LEB128
 #
 # Used in formats like DWARF, WASM - unsigned variant identical to protobuf/go
 # varint. Exception/Defect free for WASM use

 import
  stew/[bitops2, result]

 const
  # Given the truncated logarithm of a number, how many bytes do we need to
  # encode it?
  lengths = block:
    var v: array[64, int8]
    for i in 0..<64:
      v[i] = int8((i + 7) div 7)
    v

 func leb128Len*(x: SomeUnsignedInt): int8 {.inline, raises: [].} =
  ## Returns number of bytes required to encode integer ``x`` as varint.
  if x == 0: 1
  else: lengths[log2trunc(x)]

 func maxLeb128Len*(T: type): int8 {.inline, raises: [].} = leb128Len(T.high)

 type
  LebBuf*[T: SomeUnsignedInt] = object
    data*: array[maxLeb128Len(T), byte] # len(data) <= 10
    len*: int8 # >= 1 when holding valid protobuf

  LebError* = enum
    incomplete = "End marker not found in given stream"
    overflow = "Value would overflow"

 template write7(next: untyped) =
  if v > type(v)(127):
    result.data[result.len] = cast[byte](v) or 0x80'u8
    result.len += 1
    v = v shr 7
    next

 func toBytesLeb128*(v: SomeUnsignedInt): LebBuf[typeof(v)] {.noinit, raises: [].} =
  var
    v = v

  # A really clever person would write a macro for this expansion, and then only
  # really clever people would be able to read it.
  # There is something clever about this code however: a clever compiler will
  # use type information to cleverly optimze away the dead code, which is one of
  # the clever reasons toBytes is generic
  write7(): # 7
    write7(): # 14
      write7(): # 21
        write7(): # 28
          write7(): # 35
            write7(): # 42
              write7(): # 49
                write7(): # 56
                  write7(): # 63
                    discard

  result.data[result.len] = cast[byte](v) # high bit not set since v <= 127 at this point!
  result.len += 1

 template read7(shift: untyped) =
  if (shift div 7) >= x.len():
    return err(incomplete)
  let
    b = x[shift div 7]
    valb = b and 0x7f'u8
    val = T(valb)
    vals = val shl shift

  if vals shr shift != val:
    return err(overflow)

  res = res or vals
  if b == valb: # High bit not set
    return ok((res, int8((shift div 7) + 1)))

 func fromBytesLeb128*(
    T: typedesc[SomeUnsignedInt],
    x: openArray[byte]): Result[tuple[val: T, len: int8], LebError] {.raises: [].} =
  ## Parse a LEB128 byte sequence and return value and how many bytes were
  ## parsed
  var
    res: T

  read7(0)
  read7(7)
  read7(14)
  read7(21)
  read7(28)
  read7(35)
  read7(42)
  read7(49)
  read7(56)
  read7(63)

  err(incomplete)

 template toOpenArray*(v: LebBuf): openArray[byte] =
  toOpenArray(v.data, 0, v.len - 1)

 template len*(v: LebBuf): auto = v.len

 template `@`*(v: LebBuf): seq[byte] =
  @v.toOpenArray()

 func scanLeb128*(
    T: typedesc[SomeUnsignedInt],
    x: openArray[byte]): Result[int8, LebError] =
  ## Scan a buffer for a valid leb128-encoded value that at most fits in a
  ## uint64, and report how many bytes it uses
  # TODO this can be done efficiently with SSE
  T.fromBytesLeb128(x).map(proc(a: auto): auto = a.len)
	# Little-endian base 128 variable length integer utilities, as seen in
	# https://en.wikipedia.org/wiki/LEB128
	#
	# Used in formats like DWARF, WASM - unsigned variant identical to protobuf/go
	# varint. Exception/Defect free for WASM use

	import
	stew/[bitops2, result]

	const
	# Given the truncated logarithm of a number, how many bytes do we need to
	# encode it?
	lengths = block:
	var v: array[64, int8]
	for i in 0..<64:
	v[i] = int8((i + 7) div 7)
	v

	func leb128Len*(x: SomeUnsignedInt): int8 {.inline, raises: [].} =
	## Returns number of bytes required to encode integer ``x`` as varint.
	if x == 0: 1
	else: lengths[log2trunc(x)]

	func maxLeb128Len*(T: type): int8 {.inline, raises: [].} = leb128Len(T.high)

	type
	LebBuf*[T: SomeUnsignedInt] = object
	data*: array[maxLeb128Len(T), byte] # len(data) <= 10
	len*: int8 # >= 1 when holding valid protobuf

	LebError* = enum
	incomplete = "End marker not found in given stream"
	overflow = "Value would overflow"

	template write7(next: untyped) =
	if v > type(v)(127):
	result.data[result.len] = cast[byte](v) or 0x80'u8
	result.len += 1
	v = v shr 7
	next

	func toBytesLeb128*(v: SomeUnsignedInt): LebBuf[typeof(v)] {.noinit, raises: [].} =
	var
	v = v

	# A really clever person would write a macro for this expansion, and then only
	# really clever people would be able to read it.
	# There is something clever about this code however: a clever compiler will
	# use type information to cleverly optimze away the dead code, which is one of
	# the clever reasons toBytes is generic
	write7(): # 7
	write7(): # 14
	write7(): # 21
	write7(): # 28
	write7(): # 35
	write7(): # 42
	write7(): # 49
	write7(): # 56
	write7(): # 63
	discard

	result.data[result.len] = cast[byte](v) # high bit not set since v <= 127 at this point!
	result.len += 1

	template read7(shift: untyped) =
	if (shift div 7) >= x.len():
	return err(incomplete)
	let
	b = x[shift div 7]
	valb = b and 0x7f'u8
	val = T(valb)
	vals = val shl shift

	if vals shr shift != val:
	return err(overflow)

	res = res or vals
	if b == valb: # High bit not set
	return ok((res, int8((shift div 7) + 1)))

	func fromBytesLeb128*(
	T: typedesc[SomeUnsignedInt],
	x: openArray[byte]): Result[tuple[val: T, len: int8], LebError] {.raises: [].} =
	## Parse a LEB128 byte sequence and return value and how many bytes were
	## parsed
	var
	res: T

	read7(0)
	read7(7)
	read7(14)
	read7(21)
	read7(28)
	read7(35)
	read7(42)
	read7(49)
	read7(56)
	read7(63)

	err(incomplete)

	template toOpenArray*(v: LebBuf): openArray[byte] =
	toOpenArray(v.data, 0, v.len - 1)

	template len*(v: LebBuf): auto = v.len

	template `@`*(v: LebBuf): seq[byte] =
	@v.toOpenArray()

	func scanLeb128*(
	T: typedesc[SomeUnsignedInt],
	x: openArray[byte]): Result[int8, LebError] =
	## Scan a buffer for a valid leb128-encoded value that at most fits in a
	## uint64, and report how many bytes it uses
	# TODO this can be done efficiently with SSE
	T.fromBytesLeb128(x).map(proc(a: auto): auto = a.len)