Skip to content

Instantly share code, notes, and snippets.

@kyl191
Last active August 29, 2015 14:12
Show Gist options
  • Save kyl191/a24ae3ef016eab038e9a to your computer and use it in GitHub Desktop.
Save kyl191/a24ae3ef016eab038e9a to your computer and use it in GitHub Desktop.
import hashlib, os, sys, re
from os.path import join, getsize
def sha512file(file):
sha512 = hashlib.sha512()
try:
f = open(file,"rb")
except IOError:
print("IO Error, unable to open file", file)
while True:
data = f.read(10240)
if not data:
break
sha512.update(data)
return sha512.hexdigest()
source_dir = os.path.abspath(sys.argv[1])
compare_dir = os.path.abspath(sys.argv[2])
deleted_files = 0
space_saved = 0
for root, subfolders, files in os.walk(source_dir):
# Since root contains the working folder, and we'll move onto subfolders later,
# We only care about the filename
(null, path, pathsuffix) = root.rpartition(source_dir)
dup_folder = os.path.normpath(compare_dir + "/" + pathsuffix)
# Mention what path we're working in.
print("Comparing: %s" % os.path.abspath(root).encode("utf-8"))
print("To: %s" % os.path.abspath(dup_folder).encode("utf-8"))
for filename in files:
dup = os.path.abspath(dup_folder + "/" + filename)
filename = join(root,filename)
if os.path.exists(dup):
hash1 = sha512file(filename)
hash2 = sha512file(dup)
if hash1 == hash2:
print("%s:\n %s" % (os.path.abspath(filename).encode("utf-8"), hash1))
print("%s:\n %s" % (os.path.abspath(dup).encode("utf-8"), hash2))
deleted_files = deleted_files + 1
space_saved = space_saved + os.path.getsize(dup)
print("[%s] Removing %s" % (str(deleted_files), dup.encode("utf-8")))
os.remove(dup)
if os.path.exists(dup_folder) and not os.listdir(dup_folder):
os.rmdir(dup_folder)
print("Deleted " + str(deleted_files) + ", saving " + str(space_saved) + " bytes of space")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment