Skip to content

Instantly share code, notes, and snippets.

@Alexhuszagh
Last active June 18, 2016 20:44
Show Gist options
  • Save Alexhuszagh/9f26c35ec55f3897b0ad46cf8e8c7dab to your computer and use it in GitHub Desktop.
Save Alexhuszagh/9f26c35ec55f3897b0ad46cf8e8c7dab to your computer and use it in GitHub Desktop.
Sampled MD5 Example
#!/usr/bin/env python
'''
Calculate an MD5sum from the file, sampling ``read`` bytes within the file from every ``interval``.
'''
import hashlib
PATH = 'path/to/file'
def md5sum(file, start=0, interval=1024**2, read=1024):
'''Calculate an MD5 sum from blocks from a file read'''
file.seek(0, 2)
size = file.tell()
file.seek(start, 0)
shift = interval - read
md5 = hashlib.md5()
while file.tell() < size:
file.seek(shift, 1)
md5.update(file.read(read))
return md5.digest()
if __name__ == '__main__':
with open(PATH, 'rb') as f:
print(md5sum(f))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment