Created
April 27, 2019 01:04
-
-
Save ppwwyyxx/9d9ace8abc243f0e3b56c179ba381db1 to your computer and use it in GitHub Desktop.
Serialization Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Dependencies: | |
pip install \ | |
tabulate ujson msgpack msgpack_numpy numpy pyarrow | |
""" | |
import sys | |
from timeit import timeit | |
import pickle | |
from tabulate import tabulate | |
def get_tests(is_numpy): | |
tests = [ | |
# (title, setup, enc_test, dec_test) | |
('msgpack-python', 'import msgpack; import msgpack_numpy as m; m.patch(); src = msgpack.dumps(d)', 'msgpack.dumps(d)', 'msgpack.loads(src)'), | |
('pyarrow', 'import pyarrow as pa; src = pa.serialize(d).to_buffer()', 'pa.serialize(d).to_buffer()', 'pa.deserialize(src)'), | |
] | |
for k in range(2, min(pickle.HIGHEST_PROTOCOL, 3) + 1): | |
if sys.version_info.major == 3: | |
setup_pickle = 'import pickle ; src = pickle.dumps(d, {})'.format(k) | |
else: | |
setup_pickle = 'import cPickle as pickle; src = pickle.dumps(d, {})'.format(k) | |
tests.append(('pickle-protocol{}'.format(k), setup_pickle, 'pickle.dumps(d, {})'.format(k), 'pickle.loads(src)')) | |
if not is_numpy: | |
tests.extend([ | |
('json', 'import json; src = json.dumps(d)', 'json.dumps(d)', 'json.loads(src)'), | |
('ujson', 'import ujson; src = ujson.dumps(d)', 'ujson.dumps(d)', 'ujson.loads(src)') | |
]) | |
return tests | |
def run_tests(tests, data, loops): | |
enc_table = [] | |
dec_table = [] | |
print("Running tests (%d loops each)" % loops) | |
for title, mod, enc, dec in tests: | |
mod = data + ' ; ' + mod | |
print("Running " + title) | |
#print(" [Encode]", enc) | |
result = timeit(enc, mod, number=loops) | |
enc_table.append([title, result]) | |
#print(" [Decode]", dec) | |
result = timeit(dec, mod, number=loops) | |
dec_table.append([title, result]) | |
enc_table.sort(key=lambda x: x[1]) | |
enc_table.insert(0, ['Method', 'Seconds']) | |
dec_table.sort(key=lambda x: x[1]) | |
dec_table.insert(0, ['Method', 'Seconds']) | |
print("\nEncoding Test (%d loops)" % loops) | |
print(tabulate(enc_table, headers="firstrow")) | |
print("\nDecoding Test (%d loops)" % loops) | |
print(tabulate(dec_table, headers="firstrow")) | |
print("Benchmarking plain data ...............") | |
plain_data = '''d = { | |
'words': """ | |
Lorem ipsum dolor sit amet, consectetur adipiscing | |
elit. Mauris adipiscing adipiscing placerat. | |
Vestibulum augue augue, | |
pellentesque quis sollicitudin id, adipiscing. | |
""" * 100, | |
'list': list(range(100)) * 300, | |
'dict': dict((str(i),'a') for i in range(5000)), | |
'int': 3000, | |
'float': 100.123456 | |
}''' | |
run_tests(get_tests(False), plain_data, 1000) | |
print("Benchmarking numpy data ...............") | |
numpy_data = """ | |
import numpy as np | |
d = { | |
"arrays": [np.random.rand(1000, 353) for k in range(10)] | |
} """ | |
run_tests(get_tests(True), numpy_data, 300) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Outputs with Python 3.7: