generalmimon · July 22, 2024 18:29
diff --git a/bench_read_bytes_term.py b/bench_read_bytes_term.py
 """
 Usage:

    $ richbench --times TIMES --markdown .

 You need to install https://pypi.org/project/richbench/ first:

    $ python -m pip install -U richbench
 """

 import random
 import sys
 from io import BytesIO, SEEK_CUR
 import json

 TERM_HALFWAY = 1
 TERM_END = 2
 TERM_MISSING = 3

 JSON_TERM_POS_TABLE = {
    'halfway': TERM_HALFWAY,
    'end': TERM_END,
    'missing': TERM_MISSING,
 }

 with open('bench_config.json', 'r', encoding='utf-8') as f:
    conf = json.load(f)

 TERM_POS = JSON_TERM_POS_TABLE[conf['term_pos']]
 assert isinstance(conf['include_term'], bool)
 INCLUDE_TERM = conf['include_term']
 assert isinstance(conf['workload_size'], int)
 WORKLOAD_SIZE = conf['workload_size']
 del conf

 PY2 = sys.version_info[0] == 2

 class KaitaiStream(object):
    def __init__(self, io):
        self._io = io

    def seek(self, n):
        self._io.seek(n)

    def read_bytes_term_old(self, term, include_term, consume_term, eos_error):
        r = b''
        while True:
            c = self._io.read(1)
            if c == b'':
                if eos_error:
                    raise Exception(
                        "end of stream reached, but no terminator %d found" %
                        (term,)
                    )

                return r

            if ord(c) == term:
                if include_term:
                    r += c
                if not consume_term:
                    self._io.seek(-1, SEEK_CUR)
                return r

            r += c

    def read_bytes_term_new(self, term, include_term, consume_term, eos_error):
        r = bytearray()
        while True:
            c = self._io.read(1)
            if c == b'':
                if eos_error:
                    raise Exception(
                        "end of stream reached, but no terminator %d found" %
                        (term,)
                    )

                return bytes(r)

            if ord(c) == term:
                if include_term:
                    r += c
                if not consume_term:
                    self._io.seek(-1, SEEK_CUR)
                return bytes(r)

            r += c

    def read_bytes_term_new_v2(self, term, include_term, consume_term, eos_error):
        term_byte = KaitaiStream.byte_from_int(term)
        r = bytearray()
        while True:
            c = self._io.read(1)
            if not c:
                if eos_error:
                    raise Exception(
                        "end of stream reached, but no terminator %d found" %
                        (term,)
                    )

                return bytes(r)

            if c == term_byte:
                if include_term:
                    r += c
                if not consume_term:
                    self._io.seek(-1, SEEK_CUR)
                return bytes(r)

            r += c

    @staticmethod
    def byte_from_int(i):
        return chr(i) if PY2 else bytes((i,))


 term = random.getrandbits(8)
 repl = 0x00
 if repl == term:
    repl = 0xff
 buf = bytearray(random.randbytes(WORKLOAD_SIZE))
 for i, b in enumerate(buf):
    if b == term:
        buf[i] = repl
 if TERM_POS == TERM_HALFWAY:
    buf[len(buf) // 2] = term
 elif TERM_POS == TERM_END:
    buf[-2] = term
 elif TERM_POS == TERM_MISSING:
    pass
 buf = bytes(buf)

 if TERM_POS == TERM_HALFWAY:
    exp = len(buf) // 2 + (1 if INCLUDE_TERM else 0)
 elif TERM_POS == TERM_END:
    exp = len(buf) - 2 + (1 if INCLUDE_TERM else 0)
 elif TERM_POS == TERM_MISSING:
    exp = len(buf)

 ks_io = KaitaiStream(BytesIO(buf))


 def old():
    ks_io.seek(0)
    return ks_io.read_bytes_term_old(term, INCLUDE_TERM, True, False)

 def new():
    ks_io.seek(0)
    return ks_io.read_bytes_term_new(term, INCLUDE_TERM, True, False)

 def new_v2():
    ks_io.seek(0)
    return ks_io.read_bytes_term_new_v2(term, INCLUDE_TERM, True, False)


 for fn in (old, new, new_v2):
    fn_name = fn.__name__ + '()'
    act_obj = fn()
    act = len(act_obj)
    assert act == exp, '{}: expected {} but got {}'.format(fn_name, exp, act)
    assert isinstance(act_obj, bytes), '{}: expected {} but got {}'.format(fn_name, bytes, type(act_obj))


 __benchmarks__ = [
    (old, new, "old vs new"),
    (new, new_v2, "new vs new_v2"),
    (old, new_v2, "old vs new_v2"),
 ]

 '''
 $ python --version
 Python 3.12.4

 $ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 2048}' > bench_config.json && richbench --times 1000 --markdown .
                                  Benchmarks, repeat=5, number=1000

 |     Benchmark | Min     | Max     | Mean    | Min (+)         | Max (+)         | Mean (+)        |
 |---------------|---------|---------|---------|-----------------|-----------------|-----------------|
 |    old vs new | 0.132   | 0.151   | 0.139   | 0.090 (1.5x)    | 0.125 (1.2x)    | 0.106 (1.3x)    |
 | new vs new_v2 | 0.094   | 0.115   | 0.101   | 0.085 (1.1x)    | 0.088 (1.3x)    | 0.086 (1.2x)    |
 | old vs new_v2 | 0.134   | 0.193   | 0.148   | 0.082 (1.6x)    | 0.090 (2.1x)    | 0.084 (1.8x)    |

 $ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 8192}' > bench_config.json && richbench --times 500 --markdown .
                                  Benchmarks, repeat=5, number=500

 |     Benchmark | Min     | Max     | Mean    | Min (+)         | Max (+)         | Mean (+)        |
 |---------------|---------|---------|---------|-----------------|-----------------|-----------------|
 |    old vs new | 0.388   | 0.428   | 0.400   | 0.181 (2.1x)    | 0.185 (2.3x)    | 0.183 (2.2x)    |
 | new vs new_v2 | 0.181   | 0.184   | 0.182   | 0.154 (1.2x)    | 0.162 (1.1x)    | 0.159 (1.1x)    |
 | old vs new_v2 | 0.388   | 0.393   | 0.391   | 0.162 (2.4x)    | 0.165 (2.4x)    | 0.164 (2.4x)    |

 $ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 32768}' > bench_config.json && richbench --times 200 --markdown .
                                  Benchmarks, repeat=5, number=200

 |     Benchmark | Min     | Max     | Mean    | Min (+)         | Max (+)         | Mean (+)        |
 |---------------|---------|---------|---------|-----------------|-----------------|-----------------|
 |    old vs new | 0.987   | 1.009   | 0.993   | 0.290 (3.4x)    | 0.337 (3.0x)    | 0.301 (3.3x)    |
 | new vs new_v2 | 0.286   | 0.289   | 0.288   | 0.257 (1.1x)    | 0.266 (1.1x)    | 0.261 (1.1x)    |
 | old vs new_v2 | 0.984   | 1.035   | 0.997   | 0.256 (3.8x)    | 0.268 (3.9x)    | 0.261 (3.8x)    |
 '''
	"""
	Usage:

	$ richbench --times TIMES --markdown .

	You need to install https://pypi.org/project/richbench/ first:

	$ python -m pip install -U richbench
	"""

	import random
	import sys
	from io import BytesIO, SEEK_CUR
	import json

	TERM_HALFWAY = 1
	TERM_END = 2
	TERM_MISSING = 3

	JSON_TERM_POS_TABLE = {
	'halfway': TERM_HALFWAY,
	'end': TERM_END,
	'missing': TERM_MISSING,
	}

	with open('bench_config.json', 'r', encoding='utf-8') as f:
	conf = json.load(f)

	TERM_POS = JSON_TERM_POS_TABLE[conf['term_pos']]
	assert isinstance(conf['include_term'], bool)
	INCLUDE_TERM = conf['include_term']
	assert isinstance(conf['workload_size'], int)
	WORKLOAD_SIZE = conf['workload_size']
	del conf

	PY2 = sys.version_info[0] == 2

	class KaitaiStream(object):
	def __init__(self, io):
	self._io = io

	def seek(self, n):
	self._io.seek(n)

	def read_bytes_term_old(self, term, include_term, consume_term, eos_error):
	r = b''
	while True:
	c = self._io.read(1)
	if c == b'':
	if eos_error:
	raise Exception(
	"end of stream reached, but no terminator %d found" %
	(term,)
	)

	return r

	if ord(c) == term:
	if include_term:
	r += c
	if not consume_term:
	self._io.seek(-1, SEEK_CUR)
	return r

	r += c

	def read_bytes_term_new(self, term, include_term, consume_term, eos_error):
	r = bytearray()
	while True:
	c = self._io.read(1)
	if c == b'':
	if eos_error:
	raise Exception(
	"end of stream reached, but no terminator %d found" %
	(term,)
	)

	return bytes(r)

	if ord(c) == term:
	if include_term:
	r += c
	if not consume_term:
	self._io.seek(-1, SEEK_CUR)
	return bytes(r)

	r += c

	def read_bytes_term_new_v2(self, term, include_term, consume_term, eos_error):
	term_byte = KaitaiStream.byte_from_int(term)
	r = bytearray()
	while True:
	c = self._io.read(1)
	if not c:
	if eos_error:
	raise Exception(
	"end of stream reached, but no terminator %d found" %
	(term,)
	)

	return bytes(r)

	if c == term_byte:
	if include_term:
	r += c
	if not consume_term:
	self._io.seek(-1, SEEK_CUR)
	return bytes(r)

	r += c

	@staticmethod
	def byte_from_int(i):
	return chr(i) if PY2 else bytes((i,))


	term = random.getrandbits(8)
	repl = 0x00
	if repl == term:
	repl = 0xff
	buf = bytearray(random.randbytes(WORKLOAD_SIZE))
	for i, b in enumerate(buf):
	if b == term:
	buf[i] = repl
	if TERM_POS == TERM_HALFWAY:
	buf[len(buf) // 2] = term
	elif TERM_POS == TERM_END:
	buf[-2] = term
	elif TERM_POS == TERM_MISSING:
	pass
	buf = bytes(buf)

	if TERM_POS == TERM_HALFWAY:
	exp = len(buf) // 2 + (1 if INCLUDE_TERM else 0)
	elif TERM_POS == TERM_END:
	exp = len(buf) - 2 + (1 if INCLUDE_TERM else 0)
	elif TERM_POS == TERM_MISSING:
	exp = len(buf)

	ks_io = KaitaiStream(BytesIO(buf))


	def old():
	ks_io.seek(0)
	return ks_io.read_bytes_term_old(term, INCLUDE_TERM, True, False)

	def new():
	ks_io.seek(0)
	return ks_io.read_bytes_term_new(term, INCLUDE_TERM, True, False)

	def new_v2():
	ks_io.seek(0)
	return ks_io.read_bytes_term_new_v2(term, INCLUDE_TERM, True, False)


	for fn in (old, new, new_v2):
	fn_name = fn.__name__ + '()'
	act_obj = fn()
	act = len(act_obj)
	assert act == exp, '{}: expected {} but got {}'.format(fn_name, exp, act)
	assert isinstance(act_obj, bytes), '{}: expected {} but got {}'.format(fn_name, bytes, type(act_obj))


	__benchmarks__ = [
	(old, new, "old vs new"),
	(new, new_v2, "new vs new_v2"),
	(old, new_v2, "old vs new_v2"),
	]

	'''
	$ python --version
	Python 3.12.4

	$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 2048}' > bench_config.json && richbench --times 1000 --markdown .
	Benchmarks, repeat=5, number=1000

	\| Benchmark \| Min \| Max \| Mean \| Min (+) \| Max (+) \| Mean (+) \|
	\|---------------\|---------\|---------\|---------\|-----------------\|-----------------\|-----------------\|
	\| old vs new \| 0.132 \| 0.151 \| 0.139 \| 0.090 (1.5x) \| 0.125 (1.2x) \| 0.106 (1.3x) \|
	\| new vs new_v2 \| 0.094 \| 0.115 \| 0.101 \| 0.085 (1.1x) \| 0.088 (1.3x) \| 0.086 (1.2x) \|
	\| old vs new_v2 \| 0.134 \| 0.193 \| 0.148 \| 0.082 (1.6x) \| 0.090 (2.1x) \| 0.084 (1.8x) \|

	$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 8192}' > bench_config.json && richbench --times 500 --markdown .
	Benchmarks, repeat=5, number=500

	\| Benchmark \| Min \| Max \| Mean \| Min (+) \| Max (+) \| Mean (+) \|
	\|---------------\|---------\|---------\|---------\|-----------------\|-----------------\|-----------------\|
	\| old vs new \| 0.388 \| 0.428 \| 0.400 \| 0.181 (2.1x) \| 0.185 (2.3x) \| 0.183 (2.2x) \|
	\| new vs new_v2 \| 0.181 \| 0.184 \| 0.182 \| 0.154 (1.2x) \| 0.162 (1.1x) \| 0.159 (1.1x) \|
	\| old vs new_v2 \| 0.388 \| 0.393 \| 0.391 \| 0.162 (2.4x) \| 0.165 (2.4x) \| 0.164 (2.4x) \|

	$ echo '{"term_pos": "halfway", "include_term": false, "workload_size": 32768}' > bench_config.json && richbench --times 200 --markdown .
	Benchmarks, repeat=5, number=200

	\| Benchmark \| Min \| Max \| Mean \| Min (+) \| Max (+) \| Mean (+) \|
	\|---------------\|---------\|---------\|---------\|-----------------\|-----------------\|-----------------\|
	\| old vs new \| 0.987 \| 1.009 \| 0.993 \| 0.290 (3.4x) \| 0.337 (3.0x) \| 0.301 (3.3x) \|
	\| new vs new_v2 \| 0.286 \| 0.289 \| 0.288 \| 0.257 (1.1x) \| 0.266 (1.1x) \| 0.261 (1.1x) \|
	\| old vs new_v2 \| 0.984 \| 1.035 \| 0.997 \| 0.256 (3.8x) \| 0.268 (3.9x) \| 0.261 (3.8x) \|
	'''