Skip to content

Instantly share code, notes, and snippets.

@generalmimon
Last active July 22, 2024 18:29
Show Gist options
  • Save generalmimon/41dcb2bba48cb47c2f30ce9f086cbbf2 to your computer and use it in GitHub Desktop.
Save generalmimon/41dcb2bba48cb47c2f30ce9f086cbbf2 to your computer and use it in GitHub Desktop.
read_bytes_term() benchmark for the Kaitai Struct Python runtime library: https://github.com/kaitai-io/kaitai_struct_python_runtime/blob/92a2d715/kaitaistruct.py#L410
"""
Usage:
$ richbench --times TIMES --markdown .
You need to install https://pypi.org/project/richbench/ first:
$ python -m pip install -U richbench
"""
import random
import sys
from io import BytesIO, SEEK_CUR
import json
TERM_HALFWAY = 1
TERM_END = 2
TERM_MISSING = 3
JSON_TERM_POS_TABLE = {
'halfway': TERM_HALFWAY,
'end': TERM_END,
'missing': TERM_MISSING,
}
with open('bench_config.json', 'r', encoding='utf-8') as f:
conf = json.load(f)
TERM_POS = JSON_TERM_POS_TABLE[conf['term_pos']]
assert isinstance(conf['include_term'], bool)
INCLUDE_TERM = conf['include_term']
assert isinstance(conf['workload_size'], int)
WORKLOAD_SIZE = conf['workload_size']
del conf
PY2 = sys.version_info[0] == 2
class KaitaiStream(object):
def __init__(self, io):
self._io = io
def seek(self, n):
self._io.seek(n)
def read_bytes_term_old(self, term, include_term, consume_term, eos_error):
r = b''
while True:
c = self._io.read(1)
if c == b'':
if eos_error:
raise Exception(
"end of stream reached, but no terminator %d found" %
(term,)
)
return r
if ord(c) == term:
if include_term:
r += c
if not consume_term:
self._io.seek(-1, SEEK_CUR)
return r
r += c
def read_bytes_term_new(self, term, include_term, consume_term, eos_error):
r = bytearray()
while True:
c = self._io.read(1)
if c == b'':
if eos_error:
raise Exception(
"end of stream reached, but no terminator %d found" %
(term,)
)
return bytes(r)
if ord(c) == term:
if include_term:
r += c
if not consume_term:
self._io.seek(-1, SEEK_CUR)
return bytes(r)
r += c
def read_bytes_term_new_v2(self, term, include_term, consume_term, eos_error):
term_byte = KaitaiStream.byte_from_int(term)
r = bytearray()
while True:
c = self._io.read(1)
if not c:
if eos_error:
raise Exception(
"end of stream reached, but no terminator %d found" %
(term,)
)
return bytes(r)
if c == term_byte:
if include_term:
r += c
if not consume_term:
self._io.seek(-1, SEEK_CUR)
return bytes(r)
r += c
@staticmethod
def byte_from_int(i):
return chr(i) if PY2 else bytes((i,))
term = random.getrandbits(8)
repl = 0x00
if repl == term:
repl = 0xff
buf = bytearray(random.randbytes(WORKLOAD_SIZE))
for i, b in enumerate(buf):
if b == term:
buf[i] = repl
if TERM_POS == TERM_HALFWAY:
buf[len(buf) // 2] = term
elif TERM_POS == TERM_END:
buf[-2] = term
elif TERM_POS == TERM_MISSING:
pass
buf = bytes(buf)
if TERM_POS == TERM_HALFWAY:
exp = len(buf) // 2 + (1 if INCLUDE_TERM else 0)
elif TERM_POS == TERM_END:
exp = len(buf) - 2 + (1 if INCLUDE_TERM else 0)
elif TERM_POS == TERM_MISSING:
exp = len(buf)
ks_io = KaitaiStream(BytesIO(buf))
def old():
ks_io.seek(0)
return ks_io.read_bytes_term_old(term, INCLUDE_TERM, True, False)
def new():
ks_io.seek(0)
return ks_io.read_bytes_term_new(term, INCLUDE_TERM, True, False)
def new_v2():
ks_io.seek(0)
return ks_io.read_bytes_term_new_v2(term, INCLUDE_TERM, True, False)
for fn in (old, new, new_v2):
fn_name = fn.__name__ + '()'
act_obj = fn()
act = len(act_obj)
assert act == exp, '{}: expected {} but got {}'.format(fn_name, exp, act)
assert isinstance(act_obj, bytes), '{}: expected {} but got {}'.format(fn_name, bytes, type(act_obj))
__benchmarks__ = [
(old, new, "old vs new"),
(new, new_v2, "new vs new_v2"),
(old, new_v2, "old vs new_v2"),
]
'''
$ python --version
Python 3.12.4
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 2048}' > bench_config.json && richbench --times 1000 --markdown .
Benchmarks, repeat=5, number=1000
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------|
| old vs new | 0.132 | 0.151 | 0.139 | 0.090 (1.5x) | 0.125 (1.2x) | 0.106 (1.3x) |
| new vs new_v2 | 0.094 | 0.115 | 0.101 | 0.085 (1.1x) | 0.088 (1.3x) | 0.086 (1.2x) |
| old vs new_v2 | 0.134 | 0.193 | 0.148 | 0.082 (1.6x) | 0.090 (2.1x) | 0.084 (1.8x) |
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 8192}' > bench_config.json && richbench --times 500 --markdown .
Benchmarks, repeat=5, number=500
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------|
| old vs new | 0.388 | 0.428 | 0.400 | 0.181 (2.1x) | 0.185 (2.3x) | 0.183 (2.2x) |
| new vs new_v2 | 0.181 | 0.184 | 0.182 | 0.154 (1.2x) | 0.162 (1.1x) | 0.159 (1.1x) |
| old vs new_v2 | 0.388 | 0.393 | 0.391 | 0.162 (2.4x) | 0.165 (2.4x) | 0.164 (2.4x) |
$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 32768}' > bench_config.json && richbench --times 200 --markdown .
Benchmarks, repeat=5, number=200
| Benchmark | Min | Max | Mean | Min (+) | Max (+) | Mean (+) |
|---------------|---------|---------|---------|-----------------|-----------------|-----------------|
| old vs new | 0.987 | 1.009 | 0.993 | 0.290 (3.4x) | 0.337 (3.0x) | 0.301 (3.3x) |
| new vs new_v2 | 0.286 | 0.289 | 0.288 | 0.257 (1.1x) | 0.266 (1.1x) | 0.261 (1.1x) |
| old vs new_v2 | 0.984 | 1.035 | 0.997 | 0.256 (3.8x) | 0.268 (3.9x) | 0.261 (3.8x) |
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment