Created
April 24, 2020 13:01
-
-
Save cr0hn/9b564d449efa674fe4155d71621068fa to your computer and use it in GitHub Desktop.
Analysis of performance and memory consumption of pickle vs json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Number of elements: 10 | |
----------------------- | |
Json Size: 0.02715015411376953 MB | |
Json time: 0.006659951999999997 sec | |
UJson time: 0.0030970319999999996 sec | |
Pickle Size Proto 5: 0.02531909942626953 MB | |
Pickle time Proto 5: 0.003745575000000001 sec | |
Pickle Size Proto 4: 0.02531909942626953 MB | |
Pickle time Proto 4: 0.0031195769999999984 sec | |
Number of elements: 500 | |
------------------------ | |
Json Size: 24.98400592803955 MB | |
Json time: 1.651241939 sec | |
UJson time: 0.6243056419999997 sec | |
Pickle Size Proto 5: 25.035669326782227 MB | |
Pickle time Proto 5: 0.38723978699999995 sec | |
Pickle Size Proto 4: 25.035669326782227 MB | |
Pickle time Proto 4: 0.38475265300000006 sec | |
Number of elements: 1000 | |
------------------------- | |
Json Size: 98.13797855377197 MB | |
Json time: 6.275222810999999 sec | |
UJson time: 2.5652533450000004 sec | |
Pickle Size Proto 5: 98.24791717529297 MB | |
Pickle time Proto 5: 1.3807646590000004 sec | |
Pickle Size Proto 4: 98.24791717529297 MB | |
Pickle time Proto 4: 1.3946891620000006 sec |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Analysis of performance and memory consumption of pickle vs json | |
Author: Daniel García (cr0hn) | |
Last update: 2020/24/04 | |
Python version: 3.8 | |
""" | |
import sys | |
import json | |
import ujson | |
import pickle | |
import timeit | |
from gc import get_referents | |
from dataclasses import dataclass | |
from collections import namedtuple | |
from types import ModuleType, FunctionType | |
# | |
# This function was got from: https://stackoverflow.com/q/58675479/8153205 | |
# | |
# Custom objects know their class. | |
# Function objects seem to know way too much, including modules. | |
# Exclude modules as well. | |
BLACKLIST = type, ModuleType, FunctionType | |
def getsize(obj): | |
"""sum size of object & members.""" | |
if isinstance(obj, BLACKLIST): | |
raise TypeError( | |
'getsize() does not take argument of type: ' + str(type(obj))) | |
seen_ids = set() | |
size = 0 | |
objects = [obj] | |
while objects: | |
need_referents = [] | |
for obj in objects: | |
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids: | |
seen_ids.add(id(obj)) | |
size += sys.getsizeof(obj) | |
need_referents.append(obj) | |
objects = get_referents(*need_referents) | |
return size | |
for s in (10, 500, 1000): | |
print("Number of elements: ", s) | |
print("-" * (len(str(s)) + len("Number of elements: "))) | |
print() | |
info = { | |
f"key-{x}": f"{'s'* s}" for x in range(s) | |
} | |
# add some sub dictionaries | |
for ss in range(100): | |
info[f"sub-{ss}"] = { | |
f"sub-key-{x}": f"{'s' * s}" for x in range(s) | |
} | |
json_size = json.dumps(info) | |
json_time = timeit.timeit(lambda: json.dumps(info), number=10) | |
ujson_time = timeit.timeit(lambda: ujson.dumps(info), number=10) | |
pickle_size = pickle.dumps(info, protocol=pickle.HIGHEST_PROTOCOL) | |
pickle_size_proto_4 = pickle.dumps(info, protocol=4) | |
pickle_time = timeit.timeit(lambda: pickle.dumps(info, protocol=pickle.HIGHEST_PROTOCOL), number=10) | |
pickle_time_proto_4 = timeit.timeit(lambda: pickle.dumps(info, protocol=4), number=10) | |
print("Json Size: ", sys.getsizeof(json_size) / (1024 * 1024), "MB") | |
print("Json time: ", json_time, "sec") | |
print("UJson time: ", ujson_time, "sec") | |
print("Pickle Size Proto 5: ", sys.getsizeof(pickle_size) / (1024 * 1024), "MB") | |
print("Pickle time Proto 5: ", pickle_time, "sec") | |
print("Pickle Size Proto 4: ", sys.getsizeof(pickle_size_proto_4) / (1024 * 1024), "MB") | |
print("Pickle time Proto 4: ", pickle_time_proto_4, "sec") | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment