Skip to content

Instantly share code, notes, and snippets.

@vinovator
Last active January 27, 2016 15:13
Show Gist options
  • Save vinovator/de3b2c93d8ce80a4b71d to your computer and use it in GitHub Desktop.
Save vinovator/de3b2c93d8ce80a4b71d to your computer and use it in GitHub Desktop.
Script to generate MD5 checksum for a given file. Uses iterator implementation.
# checksumGenerator.py
# Python 2.7.6
"""
Script to generate MD5 checksum for a given file
Also see comparison of performance with iterator implementation
"""
import hashlib
import time
src_file = "md5/sample.xml"
def generate_md5(fname, chunk_size=4096):
"""
Function which takes a file name and returns md5 checksum of the file
"""
hash = hashlib.md5()
with open(fname, "rb") as f:
# Read the 1st block of the file
chunk = f.read(chunk_size)
# Keep reading the file until the end and update hash
while chunk:
hash.update(chunk)
chunk = f.read(chunk_size)
# Return the hex checksum
return hash.hexdigest()
def read_file(fname, chunk_size=4096):
"""
Iterator which reads file block by block
"""
# Open the file in read binary mode
with open(fname, "rb") as f:
# Read the 1st block
chunk = f.read(chunk_size)
# Keep reading the file blocks till the end
while chunk:
yield chunk # returns one block at a time
chunk = f.read(chunk_size)
def generate_md5_with_iterator(fname):
"""
using iterator to read file buffer chunks
"""
hash = hashlib.md5()
# Keeps looping till iterator runs
for chunk in read_file(fname):
hash.update(chunk)
return hash.hexdigest()
if __name__ == "__main__":
start1 = time.clock()
checksum1 = generate_md5(src_file)
end1 = time.clock()
# Performancer of normal file read method
duration1 = end1 - start1
start2 = time.clock()
# Generate checksum using iterator method
checksum2 = generate_md5_with_iterator(src_file)
end2 = time.clock()
# Performance of file read using iterator method
duration2 = end2 - start2
print("checksum: {0}, Without iterator - timetaken: {1}".
format(checksum1, duration1))
print("checksum: {0}, With iterator - timetaken: {1}".
format(checksum2, duration2))
# Compare the performance
if duration1 > duration2:
print("Using iterator is faster by {}".
format(duration1 - duration2))
elif duration1 < duration2:
print("Using iterator is slower by {}".
format(duration2 - duration1))
else:
print("No significant impact in performance")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment