Last active
April 4, 2019 20:33
-
-
Save nat-n/a8e3da75019b029058db3b643e4f1694 to your computer and use it in GitHub Desktop.
Functions for encoding and decoding integers into byte strings with Variable-length quantity encoding. https://en.wikipedia.org/wiki/Variable-length_quantity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Tuple | |
def encode(number: int) -> bytes: | |
assert number >= 0, "Can only encode positive integers" | |
flag_mask = 0b10000000 | |
value_mask = 0b01111111 | |
bytes_buffer = [number & value_mask] | |
number = number >> 7 | |
while number: | |
bytes_buffer.append(flag_mask | (number & value_mask)) | |
number = number >> 7 | |
return bytes(reversed(bytes_buffer)) | |
def decode_first(data: bytes) -> int: | |
""" | |
Decodes the first integer in the input and ignore the rest. | |
""" | |
if not data: | |
return 0 | |
value_mask = 0b01111111 | |
result = data[0] & value_mask | |
if data[0] >= 128: | |
for byte in data[1:]: | |
result = (result << 7) + (byte & value_mask) | |
if byte < 128: | |
break | |
return result | |
def decode(data: bytes) -> Tuple[int, bytes]: | |
""" | |
Decodes the first integer in the input and return the rest along side the | |
decoded value. | |
""" | |
if not data: | |
return 0 | |
value_mask = 0b01111111 | |
result = data[0] & value_mask | |
decoded_length = 1 | |
if data[0] >= 128: | |
for byte in data[1:]: | |
result = (result << 7) + (byte & value_mask) | |
decoded_length += 1 | |
if byte < 128: | |
break | |
return result, data[decoded_length:] | |
def decode_all(data: bytes) -> List[int]: | |
result = [] | |
remainder = data | |
while remainder: | |
value, remainder = decode(remainder) | |
result.append(value) | |
return result | |
def test_encode_and_decode_symmetry(size): | |
for i in range(0, size, 7): | |
assert decode_first(encode(i)) == i, f"Couldn't encode and decode {i}" | |
for i in range(0, size, 7): | |
assert decode(encode(i))[0] == i, f"Couldn't encode and decode {i}" | |
def test_encode_examples(): | |
assert list(encode(0)) == [0] | |
assert list(encode(1)) == [1] | |
assert list(encode(127)) == [127] | |
assert list(encode(128)) == [129, 0] | |
assert list(encode(255)) == [129, 127] | |
assert list(encode(256)) == [130, 0] | |
assert list(encode(257)) == [130, 1] | |
assert list(encode(10000)) == [206, 16] | |
assert list(encode(16383)) == [255, 127] | |
assert list(encode(16384)) == [129, 128, 0] | |
assert list(encode(1234567890)) == [132, 204, 216, 133, 82] | |
def test_decode_examples(): | |
assert decode([255, 0]) == (16256, []) | |
assert decode([1, 255, 0]) == (1, [255, 0]) | |
assert decode([255, 0, 255, 0]) == (16256, [255, 0]) | |
assert decode([132, 204, 216, 133, 82]) == (1234567890, []) | |
def test_decode_all_examples(): | |
assert decode_all([255, 0]) == [16256] | |
assert decode_all([132, 204, 216, 133, 82]) == [1234567890] | |
assert decode_all([255, 0, 255, 0]) == [16256, 16256] | |
assert decode_all([0, 1, 233, 233, 100, 100]) == [0, 1, 1733860, 100] | |
if __name__ == "__main__": | |
print("Testing encode/decode symetry") | |
test_encode_and_decode_symmetry(1_000_000) | |
print("Testing encode for specific cases") | |
test_encode_examples() | |
print("Testing decode for specific cases") | |
test_decode_examples() | |
print("Testing decode_all for specific cases") | |
test_decode_all_examples() | |
print("OK") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment