Last active
January 27, 2020 23:02
-
-
Save slavanap/7cc8d04cc742388cb6eb9b0389f6feb4 to your computer and use it in GitHub Desktop.
Organize files while changing folder structure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import hashlib | |
import os | |
import os.path | |
import pickle | |
import sys | |
BUF_SIZE = 1024*1024*16 | |
def get_hash(fn): | |
sha256 = hashlib.sha256() | |
size = 0 | |
with open(fn, 'rb') as f: | |
while True: | |
data = f.read(BUF_SIZE) | |
if not data: | |
break | |
size += len(data) | |
sha256.update(data) | |
return "{}:{}:{}:{}".format(size, sha256.hexdigest(), sha256.digest_size, sha256.block_size) | |
def equal_files(filename1, filename2): | |
if os.path.getsize(filename1) != os.path.getsize(filename2): | |
return False | |
with open(filename1, 'rb') as f1, open(filename2, 'rb') as f2: | |
while True: | |
d1 = f1.read(BUF_SIZE) | |
d2 = f2.read(BUF_SIZE) | |
if d1 != d2: | |
return False | |
if not d1: | |
return True | |
def main(keepPath, clearPath): | |
if True: | |
keep = {} | |
for (dirpath, dirnames, filenames) in os.walk(keepPath): | |
for filename in filenames: | |
fn = os.path.join(dirpath, filename) | |
h = get_hash(fn) | |
keep_fn = keep.get(h, None) | |
if keep_fn is None: | |
keep[h] = fn | |
else: | |
if os.path.getsize(fn) > 0 and not equal_files(fn, keep_fn): | |
print("KEEP COLLISION: '{}' and '{}'".format(fn, keep_fn)) | |
with open("keep.p", "wb") as f: | |
pickle.dump(keep, f) | |
else: | |
with open("keep.p", "rb") as f: | |
keep = pickle.load(f) | |
for (dirpath, dirnames, filenames) in os.walk(clearPath): | |
for filename in filenames: | |
fn = os.path.join(dirpath, filename) | |
h = get_hash(fn) | |
keep_fn = keep.get(h, None) | |
if keep_fn is None: | |
pass #print("OLD FILE: {}".format(fn)) | |
else: | |
if os.path.getsize(fn) > 0 and not equal_files(fn, keep_fn): | |
print("COLLISION: '{}' and '{}'".format(fn, keep_fn)) | |
else: | |
print("REMOVE: {}".format(fn)) | |
os.remove(fn) | |
dirs_to_delete = reversed(sorted(dirpath for (dirpath, dirnames, filenames) in os.walk(clearPath) if not filenames)) | |
for dirpath in dirs_to_delete: | |
if not os.listdir(dirpath): | |
os.rmdir(dirpath) | |
if __name__ == "__main__": | |
main(keepPath=sys.argv[1], clearPath=sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment