Last active
July 22, 2024 18:29
-
-
Save generalmimon/41dcb2bba48cb47c2f30ce9f086cbbf2 to your computer and use it in GitHub Desktop.
read_bytes_term() benchmark for the Kaitai Struct Python runtime library: https://github.com/kaitai-io/kaitai_struct_python_runtime/blob/92a2d715/kaitaistruct.py#L410
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage: | |
$ richbench --times TIMES --markdown . | |
You need to install https://pypi.org/project/richbench/ first: | |
$ python -m pip install -U richbench | |
""" | |
import random | |
import sys | |
from io import BytesIO, SEEK_CUR | |
import json | |
TERM_HALFWAY = 1 | |
TERM_END = 2 | |
TERM_MISSING = 3 | |
JSON_TERM_POS_TABLE = { | |
'halfway': TERM_HALFWAY, | |
'end': TERM_END, | |
'missing': TERM_MISSING, | |
} | |
with open('bench_config.json', 'r', encoding='utf-8') as f: | |
conf = json.load(f) | |
TERM_POS = JSON_TERM_POS_TABLE[conf['term_pos']] | |
assert isinstance(conf['include_term'], bool) | |
INCLUDE_TERM = conf['include_term'] | |
assert isinstance(conf['workload_size'], int) | |
WORKLOAD_SIZE = conf['workload_size'] | |
del conf | |
PY2 = sys.version_info[0] == 2 | |
class KaitaiStream(object): | |
def __init__(self, io): | |
self._io = io | |
def seek(self, n): | |
self._io.seek(n) | |
def read_bytes_term_old(self, term, include_term, consume_term, eos_error): | |
r = b'' | |
while True: | |
c = self._io.read(1) | |
if c == b'': | |
if eos_error: | |
raise Exception( | |
"end of stream reached, but no terminator %d found" % | |
(term,) | |
) | |
return r | |
if ord(c) == term: | |
if include_term: | |
r += c | |
if not consume_term: | |
self._io.seek(-1, SEEK_CUR) | |
return r | |
r += c | |
def read_bytes_term_new(self, term, include_term, consume_term, eos_error): | |
r = bytearray() | |
while True: | |
c = self._io.read(1) | |
if c == b'': | |
if eos_error: | |
raise Exception( | |
"end of stream reached, but no terminator %d found" % | |
(term,) | |
) | |
return bytes(r) | |
if ord(c) == term: | |
if include_term: | |
r += c | |
if not consume_term: | |
self._io.seek(-1, SEEK_CUR) | |
return bytes(r) | |
r += c | |
def read_bytes_term_new_v2(self, term, include_term, consume_term, eos_error): | |
term_byte = KaitaiStream.byte_from_int(term) | |
r = bytearray() | |
while True: | |
c = self._io.read(1) | |
if not c: | |
if eos_error: | |
raise Exception( | |
"end of stream reached, but no terminator %d found" % | |
(term,) | |
) | |
return bytes(r) | |
if c == term_byte: | |
if include_term: | |
r += c | |
if not consume_term: | |
self._io.seek(-1, SEEK_CUR) | |
return bytes(r) | |
r += c | |
@staticmethod | |
def byte_from_int(i): | |
return chr(i) if PY2 else bytes((i,)) | |
term = random.getrandbits(8) | |
repl = 0x00 | |
if repl == term: | |
repl = 0xff | |
buf = bytearray(random.randbytes(WORKLOAD_SIZE)) | |
for i, b in enumerate(buf): | |
if b == term: | |
buf[i] = repl | |
if TERM_POS == TERM_HALFWAY: | |
buf[len(buf) // 2] = term | |
elif TERM_POS == TERM_END: | |
buf[-2] = term | |
elif TERM_POS == TERM_MISSING: | |
pass | |
buf = bytes(buf) | |
if TERM_POS == TERM_HALFWAY: | |
exp = len(buf) // 2 + (1 if INCLUDE_TERM else 0) | |
elif TERM_POS == TERM_END: | |
exp = len(buf) - 2 + (1 if INCLUDE_TERM else 0) | |
elif TERM_POS == TERM_MISSING: | |
exp = len(buf) | |
ks_io = KaitaiStream(BytesIO(buf)) | |
def old(): | |
ks_io.seek(0) | |
return ks_io.read_bytes_term_old(term, INCLUDE_TERM, True, False) | |
def new(): | |
ks_io.seek(0) | |
return ks_io.read_bytes_term_new(term, INCLUDE_TERM, True, False) | |
def new_v2(): | |
ks_io.seek(0) | |
return ks_io.read_bytes_term_new_v2(term, INCLUDE_TERM, True, False) | |
for fn in (old, new, new_v2): | |
fn_name = fn.__name__ + '()' | |
act_obj = fn() | |
act = len(act_obj) | |
assert act == exp, '{}: expected {} but got {}'.format(fn_name, exp, act) | |
assert isinstance(act_obj, bytes), '{}: expected {} but got {}'.format(fn_name, bytes, type(act_obj)) | |
__benchmarks__ = [ | |
(old, new, "old vs new"), | |
(new, new_v2, "new vs new_v2"), | |
(old, new_v2, "old vs new_v2"), | |
] | |
''' | |
$ python --version | |
Python 3.12.4 | |
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 2048}' > bench_config.json && richbench --times 1000 --markdown . | |
Benchmarks, repeat=5, number=1000 | |
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) | | |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------| | |
| old vs new | 0.132 | 0.151 | 0.139 | 0.090 (1.5x) | 0.125 (1.2x) | 0.106 (1.3x) | | |
| new vs new_v2 | 0.094 | 0.115 | 0.101 | 0.085 (1.1x) | 0.088 (1.3x) | 0.086 (1.2x) | | |
| old vs new_v2 | 0.134 | 0.193 | 0.148 | 0.082 (1.6x) | 0.090 (2.1x) | 0.084 (1.8x) | | |
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 8192}' > bench_config.json && richbench --times 500 --markdown . | |
Benchmarks, repeat=5, number=500 | |
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) | | |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------| | |
| old vs new | 0.388 | 0.428 | 0.400 | 0.181 (2.1x) | 0.185 (2.3x) | 0.183 (2.2x) | | |
| new vs new_v2 | 0.181 | 0.184 | 0.182 | 0.154 (1.2x) | 0.162 (1.1x) | 0.159 (1.1x) | | |
| old vs new_v2 | 0.388 | 0.393 | 0.391 | 0.162 (2.4x) | 0.165 (2.4x) | 0.164 (2.4x) | | |
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 32768}' > bench_config.json && richbench --times 200 --markdown . | |
Benchmarks, repeat=5, number=200 | |
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) | | |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------| | |
| old vs new | 0.987 | 1.009 | 0.993 | 0.290 (3.4x) | 0.337 (3.0x) | 0.301 (3.3x) | | |
| new vs new_v2 | 0.286 | 0.289 | 0.288 | 0.257 (1.1x) | 0.266 (1.1x) | 0.261 (1.1x) | | |
| old vs new_v2 | 0.984 | 1.035 | 0.997 | 0.256 (3.8x) | 0.268 (3.9x) | 0.261 (3.8x) | | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment