Skip to content

Instantly share code, notes, and snippets.

@qpwo
Last active November 30, 2024 18:59
Show Gist options
  • Save qpwo/a1b485b66540e4d81288a0853f3d8343 to your computer and use it in GitHub Desktop.
Save qpwo/a1b485b66540e4d81288a0853f3d8343 to your computer and use it in GitHub Desktop.
python pickle/pytorch append!
import os, pickle, time, numpy as np
def naive_append(filename, obj):
objs = []
if os.path.exists(filename):
with open(filename, 'rb') as f:
objs = pickle.load(f)
objs.append(obj)
with open(filename, 'wb') as f:
pickle.dump(objs, f)
def naive_load(filename):
with open(filename, 'rb') as f:
return pickle.load(f)
def smart_append(filename, obj):
with open(filename, 'ab+') as f:
pickle.dump(obj, f)
def smart_load(filename):
with open(filename, 'rb') as f:
while True:
try:
yield pickle.load(f)
except EOFError:
break
def bench(appender, loader):
filename="test.log.pkl"
if os.path.exists(filename):
os.remove(filename)
arrays = [np.random.randn(500, 500) for _ in range(100)]
started = time.time()
for arr in arrays:
appender(filename, arr)
print(f"{appender.__name__}:\t{time.time() - started:.3f} seconds")
started = time.time()
got_back = list(loader(filename))
print(f"{loader.__name__}:\t{time.time() - started:.3f} seconds")
assert len(arrays) == len(got_back)
assert all(a.sum() == b.sum() for a, b in zip(arrays, got_back))
bench(naive_append, naive_load)
bench(smart_append, smart_load)
# naive_append: 8.387 seconds
# naive_load: 0.024 seconds
# smart_append: 0.042 seconds
# smart_load: 0.026 seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment