Skip to content

Instantly share code, notes, and snippets.

@synodriver
Created February 18, 2024 07:01
Show Gist options
  • Save synodriver/8872e4cdf1ed8b0ba1d2be648f7193ef to your computer and use it in GitHub Desktop.
Save synodriver/8872e4cdf1ed8b0ba1d2be648f7193ef to your computer and use it in GitHub Desktop.
hash folder
import argparse
import hashlib
import os
import sys
from concurrent.futures import ThreadPoolExecutor
from typing import IO
def hash_filename(filename: str, chunk_size=2 * 10 * 6) -> str:
md = hashlib.md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
md.update(chunk)
return md.hexdigest()
def compare_file(file1: str, file2: str) -> bool:
return hash_filename(file1) == hash_filename(file2)
def task(abspath: str, root1: str, root2: str, log: IO[str] = None) -> None:
another_path = abspath.replace(root1, root2)
try:
if not compare_file(abspath, another_path):
print(
f"File {abspath} doesn't match {another_path} with wrong md5",
file=log or sys.stderr,
)
else:
print(f"File {abspath} matches {another_path}", file=log or sys.stdout)
except FileNotFoundError:
print(f"File {another_path} doesn't exist", file=log or sys.stderr)
def compare_dir(root1: str, root2: str, log: str, workers=10) -> None:
with ThreadPoolExecutor(max_workers=workers) as pool:
with open(log, "w", encoding="gbk") as logfile:
for dirpath, dirnames, filenames in os.walk(root1):
for file in filenames:
abspath = os.path.join(dirpath, file)
pool.submit(task, abspath, root1, root2, logfile)
def main():
parser = argparse.ArgumentParser(description="recursive compare two directories")
parser.add_argument("-s", "--src", required=True)
parser.add_argument("-d", "--dst", required=True)
parser.add_argument("-w", "--workers", default=10, type=int)
parser.add_argument("--log", default="compare.log", type=str)
ret = parser.parse_args()
print(f"comparing {ret.src} to {ret.dst} with {ret.workers} threads")
compare_dir(ret.src, ret.dst, ret.log, ret.workers)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment