Last active
June 3, 2019 22:26
-
-
Save jaytaylor/76de9c99acbfac637e68f78809dbd27e to your computer and use it in GitHub Desktop.
MD5 hash calculator for multi-part file uploads to S3 / object storage.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| @author Jay E. Taylor <[email protected]> | |
| @date 2019-06-03 | |
| @description MD5 hash calculator for multi-part file uploads to S3 / object storage. | |
| Also see the Go version: https://gist.github.com/jaytaylor/57799723734dd90e3a8510e0de1ba38f | |
| Based on: https://gist.github.com/itemir/f5bc9fded6483cd79c89ebf4ca1cfd30 | |
| """ | |
| import argparse | |
| import hashlib | |
| import logging | |
| import sys | |
| def parse_flags(args): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('filename', help='File to calculate chunked MD5 sum for') | |
| parser.add_argument('-s', '--part-size', type=int, default=1073741824, help='Segment size for individual parts, in bytes (default=1073741824, i.e. 1GB)') | |
| parser.add_argument('-b', '--base64', action='store_true', help='Display in base64 instead of hexadecimal') | |
| parser.add_argument('-v', '--verbose', action='store_true', help='Activate verbose log output') | |
| flags = parser.parse_args(args) | |
| logging.basicConfig(level=logging.DEBUG if flags.verbose else logging.INFO) | |
| return flags | |
| def md5_chunks(f, part_bytes): | |
| def log_len(n): | |
| logging.debug('read chunk of length %s', n) | |
| return True | |
| return [hashlib.md5(chunk).digest() for chunk in iter(lambda: f.read(part_bytes), b'') if log_len(len(chunk))] | |
| def main(args): | |
| flags = parse_flags(args) | |
| try: | |
| with open(flags.filename, 'rb') as fh: | |
| hashes = md5_chunks(fh, flags.part_size) | |
| if flags.verbose: | |
| for i, h in enumerate(hashes): | |
| logging.debug('hash=%s part_seq_num=%s', h.encode('hex').strip(), i+1) | |
| except IOError: | |
| logging.error('Cannot open file "%s"', flags.filename) | |
| return 1 | |
| logging.debug('joined hash = %s', b''.join(hashes).encode('hex')) | |
| multipart_hash = hashlib.md5(b''.join(hashes)).hexdigest() | |
| logging.debug("final hex digest = %s", multipart_hash) | |
| if flags.base64 == True: | |
| b64_multipart_hash = multipart_hash.decode('hex').encode('base64').strip() | |
| print('%s-%d' % (b64_multipart_hash, len(hashes))) | |
| else: | |
| print('%s-%d' % (multipart_hash, len(hashes))) | |
| return 0 | |
| if __name__ == '__main__': | |
| sys.exit(main(sys.argv[1:])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment