Created
August 26, 2012 13:15
-
-
Save ViktorStiskala/3479130 to your computer and use it in GitHub Desktop.
Amazon Glacier tree hash calculation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import hashlib | |
def bytes_to_hex(str): | |
return ''.join( [ "%02x" % ord( x ) for x in str] ).strip() | |
def chunk_hashes(str): | |
""" | |
Break up the byte-string into 1MB chunks and return sha256 hashes | |
for each. | |
""" | |
chunk = 1024*1024 | |
chunk_count = int(math.ceil(len(str)/float(chunk))) | |
chunks = [str[i*chunk:(i+1)*chunk] for i in range(chunk_count)] | |
return [hashlib.sha256(x).digest() for x in chunks] | |
def tree_hash(hashes): | |
""" | |
Given a hash of each 1MB chunk (from chunk_hashes) this will hash | |
together adjacent hashes until it ends up with one big one. So a | |
tree of hashes. | |
""" | |
while len(hashes) > 1: | |
hashes = [hashlib.sha256("".join(hashes[i:i+2])).digest() for i in xrange(0, len(hashes), 2)] | |
return hashes[0] | |
with file('file_list.txt', 'rb') as f: | |
content = f.read() | |
hashes = chunk_hashes(content) | |
print bytes_to_hex(tree_hash(hashes)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I little credit would have been nice! You copied this from here: https://gist.github.com/3417915
Also, it has a bug in it.