Skip to content

Instantly share code, notes, and snippets.

@blha303
Last active December 3, 2022 21:03
Show Gist options
  • Save blha303/4c87ec7875edeea1c1398eb0c1cc09a5 to your computer and use it in GitHub Desktop.
Save blha303/4c87ec7875edeea1c1398eb0c1cc09a5 to your computer and use it in GitHub Desktop.
A tool to generate checksums for all files in current directory and notify when mismatches with an existing file are found
#!/usr/bin/env python
import os
import json
from hashlib import md5
rootdir = os.getcwd()
CHECKSUMS = "checksums.json"
try:
with open(CHECKSUMS) as f:
output = json.load(f)
except:
output = {}
def md5sum(filename):
""" Opens a file and progressively generates an MD5 hash
from its contents, avoiding loading the complete
contents into ram at once
http://stackoverflow.com/a/24847608 """
hash = md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(128 * hash.block_size), b""):
hash.update(chunk)
return hash.hexdigest()
def main():
""" Iterate over files in subdirectories of current dir, use
md5sum() to generate checksums for each, append to a dict
for inclusion in checksums.json (name configurable) or
checks an existing checksums.json for conflicts """
for folder, subs, files in os.walk(rootdir):
for filename in files:
if filename == CHECKSUMS:
continue
fn = os.path.join(folder, filename)[len(rootdir)+1:]
if fn in output:
if output[fn] != md5sum(os.path.join(folder, filename)):
# replace with code to redownload given file?
print("Mismatch on {}".format(fn))
else:
output[os.path.join(folder, filename)[len(rootdir)+1:]] = \
md5sum(os.path.join(folder, filename))
# Pretty prints json to output file
with open(os.path.join(rootdir, CHECKSUMS), "w") as f:
json.dump(output, f, indent=4)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment