Skip to content

Instantly share code, notes, and snippets.

@Suor
Last active October 30, 2021 16:31
Show Gist options
  • Save Suor/d43c94e605fc682fab18cba3d89379bd to your computer and use it in GitHub Desktop.
Save Suor/d43c94e605fc682fab18cba3d89379bd to your computer and use it in GitHub Desktop.
Different way to calculate struct size
import gc
import sys
import types
from collection import deque
import random
def getsize(obj):
BLACKLIST = type, types.ModuleType, types.FunctionType
if isinstance(obj, BLACKLIST):
raise TypeError("getsize() does not take argument of type: " + str(type(obj)))
seen_ids, size, objects = set(), 0, [obj]
while objects:
need_referents = []
for obj in objects:
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids:
seen_ids.add(id(obj))
size += sys.getsizeof(obj)
need_referents.append(obj)
objects = gc.get_referents(*need_referents)
return size
def json_size_d(obj):
size, todo = 0, deque([obj])
while todo:
obj = todo.pop()
if isinstance(obj, (bool, int, float)):
size += len(str(obj))
elif isinstance(obj, str):
size += len(obj) + 2
elif isinstance(obj, (list, tuple, set)):
size += len(obj) * 2
todo.extend(obj)
elif isinstance(obj, dict):
size += len(obj) * 6 + sum(len(str(k)) for k in obj)
todo.extend(obj.values())
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
return size
def json_size_dd(obj, cache=None):
"""A slower version of len(json.dumps(obj)), which doesn't require much extra memory"""
literals = (bool, int, float, type(None), datetime)
size, todo = 0, [(set(), [obj])]
while todo:
path, objects = todo.pop()
for obj in objects:
if isinstance(obj, literals):
size += len(str(obj))
elif isinstance(obj, str):
size += len(obj) + 2
else:
obj_id = id(obj)
if obj_id in path:
raise ValueError("Circular reference found")
elif cache and obj_id in cache:
size += cache[obj_id]
elif isinstance(obj, (list, tuple, set)):
size += len(obj) * 2
todo.append((path | {obj_id}, obj))
elif isinstance(obj, dict):
size += len(obj) * 6 + sum(len(str(k)) for k in obj)
todo.append((path | {obj_id}, obj.values()))
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
return size
def json_size(obj):
if isinstance(obj, (bool, int, float)):
return len(str(obj))
elif isinstance(obj, str):
return len(obj) + 2
elif isinstance(obj, (list, tuple, set, dict)):
n = len(obj)
if n == 0:
return 2
elif isinstance(obj, dict):
return len(obj) * 6 + sum(len(str(k)) for k in obj) + sum(map(json_size, obj.values()))
else:
return len(obj) * 2 + sum(map(json_size, obj))
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
def json_size_hint(obj, sample_size=10):
if isinstance(obj, (bool, int, float)):
return len(str(obj))
elif isinstance(obj, str):
return len(obj) + 2
elif isinstance(obj, (list, tuple, set, dict)):
n = len(obj)
if n == 0:
return 2
elif isinstance(obj, dict):
sample = obj.items() if n <= sample_size else random.sample(obj.items(), sample_size)
return n * 6 + sum(len(str(k)) + json_size_hint(v) for k, v in sample) * n / len(sample)
else:
sample = obj if n <= sample_size else random.sample(obj, sample_size)
return n * 2 + sum(map(json_size_hint, sample)) * n / len(sample)
else:
raise TypeError("Can't get json_size() of " + str(type(obj)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment