Last active
January 27, 2016 15:13
-
-
Save vinovator/de3b2c93d8ce80a4b71d to your computer and use it in GitHub Desktop.
Script to generate MD5 checksum for a given file. Uses iterator implementation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# checksumGenerator.py | |
# Python 2.7.6 | |
""" | |
Script to generate MD5 checksum for a given file | |
Also see comparison of performance with iterator implementation | |
""" | |
import hashlib | |
import time | |
src_file = "md5/sample.xml" | |
def generate_md5(fname, chunk_size=4096): | |
""" | |
Function which takes a file name and returns md5 checksum of the file | |
""" | |
hash = hashlib.md5() | |
with open(fname, "rb") as f: | |
# Read the 1st block of the file | |
chunk = f.read(chunk_size) | |
# Keep reading the file until the end and update hash | |
while chunk: | |
hash.update(chunk) | |
chunk = f.read(chunk_size) | |
# Return the hex checksum | |
return hash.hexdigest() | |
def read_file(fname, chunk_size=4096): | |
""" | |
Iterator which reads file block by block | |
""" | |
# Open the file in read binary mode | |
with open(fname, "rb") as f: | |
# Read the 1st block | |
chunk = f.read(chunk_size) | |
# Keep reading the file blocks till the end | |
while chunk: | |
yield chunk # returns one block at a time | |
chunk = f.read(chunk_size) | |
def generate_md5_with_iterator(fname): | |
""" | |
using iterator to read file buffer chunks | |
""" | |
hash = hashlib.md5() | |
# Keeps looping till iterator runs | |
for chunk in read_file(fname): | |
hash.update(chunk) | |
return hash.hexdigest() | |
if __name__ == "__main__": | |
start1 = time.clock() | |
checksum1 = generate_md5(src_file) | |
end1 = time.clock() | |
# Performancer of normal file read method | |
duration1 = end1 - start1 | |
start2 = time.clock() | |
# Generate checksum using iterator method | |
checksum2 = generate_md5_with_iterator(src_file) | |
end2 = time.clock() | |
# Performance of file read using iterator method | |
duration2 = end2 - start2 | |
print("checksum: {0}, Without iterator - timetaken: {1}". | |
format(checksum1, duration1)) | |
print("checksum: {0}, With iterator - timetaken: {1}". | |
format(checksum2, duration2)) | |
# Compare the performance | |
if duration1 > duration2: | |
print("Using iterator is faster by {}". | |
format(duration1 - duration2)) | |
elif duration1 < duration2: | |
print("Using iterator is slower by {}". | |
format(duration2 - duration1)) | |
else: | |
print("No significant impact in performance") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment