Created
October 3, 2018 06:57
-
-
Save IvanBayan/2dd9af4439764df6061eebdce09b7db2 to your computer and use it in GitHub Desktop.
Cleanup dir. Delete all files with same hash or delete all files with same hash which matched to regexp.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import os | |
import argparse | |
import re | |
def make_hash(filename): | |
with open(filename, 'rb') as afile: | |
hasher = hashlib.md5() | |
buf = afile.read(6553600) | |
while len(buf) > 0: | |
hasher.update(buf) | |
buf = afile.read(6553600) | |
return hasher.hexdigest() | |
if __name__ == '__main__': | |
hashes = {} | |
todelete = [] | |
ap = argparse.ArgumentParser() | |
ap.add_argument("-d", "--dir", required=True, | |
help="directory to clean up (recursively)") | |
ap.add_argument("-r", "--regexp", required=False, | |
help="delete all files with same hash which matched regexp") | |
args = vars(ap.parse_args()) | |
if args['regexp'] is not None: | |
r = re.compile(args['regexp']) | |
for directory, subdirs, files in os.walk(args['dir']): | |
for ffile in files: | |
print("hashing " + ffile) | |
hash = make_hash("{}/{}".format(args['dir'], ffile)) | |
try: | |
hashes[hash].append(ffile) | |
except: | |
hashes[hash] = [ffile] | |
for h in hashes.keys(): | |
if len(hashes[h]) == 1: | |
continue | |
if args['regexp'] is not None: | |
todelete.extend(list(filter(r.match, hashes[h]))) | |
else: | |
todelete.extend(hashes[h][1:]) | |
for f in todelete: | |
print("Deleting {}".format(f)) | |
os.remove("{}/{}".format(args['dir'], f)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment