Last active
October 30, 2021 16:31
-
-
Save Suor/d43c94e605fc682fab18cba3d89379bd to your computer and use it in GitHub Desktop.
Different way to calculate struct size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gc | |
import sys | |
import types | |
from collection import deque | |
import random | |
def getsize(obj): | |
BLACKLIST = type, types.ModuleType, types.FunctionType | |
if isinstance(obj, BLACKLIST): | |
raise TypeError("getsize() does not take argument of type: " + str(type(obj))) | |
seen_ids, size, objects = set(), 0, [obj] | |
while objects: | |
need_referents = [] | |
for obj in objects: | |
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids: | |
seen_ids.add(id(obj)) | |
size += sys.getsizeof(obj) | |
need_referents.append(obj) | |
objects = gc.get_referents(*need_referents) | |
return size | |
def json_size_d(obj): | |
size, todo = 0, deque([obj]) | |
while todo: | |
obj = todo.pop() | |
if isinstance(obj, (bool, int, float)): | |
size += len(str(obj)) | |
elif isinstance(obj, str): | |
size += len(obj) + 2 | |
elif isinstance(obj, (list, tuple, set)): | |
size += len(obj) * 2 | |
todo.extend(obj) | |
elif isinstance(obj, dict): | |
size += len(obj) * 6 + sum(len(str(k)) for k in obj) | |
todo.extend(obj.values()) | |
else: | |
raise TypeError("Can't get json_size() of " + str(type(obj))) | |
return size | |
def json_size_dd(obj, cache=None): | |
"""A slower version of len(json.dumps(obj)), which doesn't require much extra memory""" | |
literals = (bool, int, float, type(None), datetime) | |
size, todo = 0, [(set(), [obj])] | |
while todo: | |
path, objects = todo.pop() | |
for obj in objects: | |
if isinstance(obj, literals): | |
size += len(str(obj)) | |
elif isinstance(obj, str): | |
size += len(obj) + 2 | |
else: | |
obj_id = id(obj) | |
if obj_id in path: | |
raise ValueError("Circular reference found") | |
elif cache and obj_id in cache: | |
size += cache[obj_id] | |
elif isinstance(obj, (list, tuple, set)): | |
size += len(obj) * 2 | |
todo.append((path | {obj_id}, obj)) | |
elif isinstance(obj, dict): | |
size += len(obj) * 6 + sum(len(str(k)) for k in obj) | |
todo.append((path | {obj_id}, obj.values())) | |
else: | |
raise TypeError("Can't get json_size() of " + str(type(obj))) | |
return size | |
def json_size(obj): | |
if isinstance(obj, (bool, int, float)): | |
return len(str(obj)) | |
elif isinstance(obj, str): | |
return len(obj) + 2 | |
elif isinstance(obj, (list, tuple, set, dict)): | |
n = len(obj) | |
if n == 0: | |
return 2 | |
elif isinstance(obj, dict): | |
return len(obj) * 6 + sum(len(str(k)) for k in obj) + sum(map(json_size, obj.values())) | |
else: | |
return len(obj) * 2 + sum(map(json_size, obj)) | |
else: | |
raise TypeError("Can't get json_size() of " + str(type(obj))) | |
def json_size_hint(obj, sample_size=10): | |
if isinstance(obj, (bool, int, float)): | |
return len(str(obj)) | |
elif isinstance(obj, str): | |
return len(obj) + 2 | |
elif isinstance(obj, (list, tuple, set, dict)): | |
n = len(obj) | |
if n == 0: | |
return 2 | |
elif isinstance(obj, dict): | |
sample = obj.items() if n <= sample_size else random.sample(obj.items(), sample_size) | |
return n * 6 + sum(len(str(k)) + json_size_hint(v) for k, v in sample) * n / len(sample) | |
else: | |
sample = obj if n <= sample_size else random.sample(obj, sample_size) | |
return n * 2 + sum(map(json_size_hint, sample)) * n / len(sample) | |
else: | |
raise TypeError("Can't get json_size() of " + str(type(obj))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment