Last active
December 16, 2020 21:24
-
-
Save cculianu/8e97702e7a4e21d425415b3098799e2a to your computer and use it in GitHub Desktop.
Benchmark recursive hashing vs current E-X approach
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import hashlib | |
import random | |
import time | |
from typing import List | |
def make_random_history(count: int) -> List[bytes]: | |
ret = [] | |
for _ in range(count): | |
rand_hash_hex = bytes(random.getrandbits(8) for _ in range(32)).hex() | |
rand_height = random.randint(0, 600_000) | |
ret.append(f"{rand_hash_hex}:{rand_height}".encode('ascii')) | |
ret = sorted(ret, key=lambda x: int(x.split(b':')[-1])) | |
return ret | |
def sha256d(b: bytes) -> bytes: | |
return hashlib.sha256(hashlib.sha256(b).digest()).digest() | |
def recursive_hash(items: List[bytes], chunk_size: int = 1) -> bytes: | |
status = b'00' * 32 | |
for i in range(0, len(items), chunk_size): | |
status = sha256d(status.hex().encode('ascii') + b':' + b':'.join(items[i:i+chunk_size])) | |
return status | |
def main(): | |
n = 1_000_000 | |
print(f"Generating {n} random sorted history items ...") | |
t0 = time.time() | |
items = make_random_history(n) | |
print(f"Elapsed: {(time.time() - t0) * 1e3:1.3f} msec") | |
print('-' * 80) | |
# Bench hashing 1 million items the current E-X way | |
print(f"Hashing all {n} items once (current E-X approach) ...") | |
t0 = time.time() | |
bstr = b':'.join(items); | |
t1 = time.time() | |
fake_status = sha256d(bstr).hex() | |
print(f"Result: {fake_status}") | |
print(f"Elapsed: {(time.time()-t0)*1e3:1.3f} msec (of which {(t1-t0)*1e3:1.3f} msec was spent concatenating the " | |
f"byte string)") | |
print('-' * 80) | |
# Bench hashing 1 million items the proposed 'recursive' way, using various "chunk" sizes | |
for chunk_size in (1, 10, 100, 1_000, 10_000): | |
print(f"Hashing all {n} items recursively, using \"chunk size\" of {chunk_size} ...") | |
t0 = time.time() | |
fake_status = recursive_hash(items, chunk_size).hex() | |
print(f"Result: {fake_status}") | |
print(f"Elapsed: {(time.time()-t0)*1e3:1.3f} msec") | |
print('-' * 80) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment