Skip to content

Instantly share code, notes, and snippets.

@agoose77
Created July 18, 2016 15:20
Show Gist options
  • Save agoose77/a19335980def75bc0f61d14d2cb6694d to your computer and use it in GitHub Desktop.
Save agoose77/a19335980def75bc0f61d14d2cb6694d to your computer and use it in GitHub Desktop.
import os
import hashlib
import argparse
MAX_HASH = 500
def dump(hashes, f):
as_str = "\n".join(["{},{},{}".format(*data) for data in hashes])
f.write(as_str + "\n")
hashes.clear()
print("Dumped 500")
def md5(fname):
hash_md5 = hashlib.md5()
count = 0
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
count += len(chunk)
hash_md5.update(chunk)
return hash_md5.hexdigest(), count
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Build file hash table')
parser.add_argument('root', type=str, help='directory to iter from')
args = parser.parse_args()
hashes = []
with open("output.txt", "w") as outputf:
for root, dirs, filenames in os.walk(args.root):
for filename in filenames:
path = os.path.join(root, filename)
hash, size= md5(path)
hashes.append((hash, size, path))
if len(hashes) == MAX_HASH:
dump(hashes, outputf)
dump(hashes, outputf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment