Last active
April 19, 2018 12:48
-
-
Save jacoor/08c354bd01fa8d5f347c5cc16026409f to your computer and use it in GitHub Desktop.
duplicate photos cleaner. Cleans JPG if RAW of the same name is present in the same dir. Uncomment remove line, 27, otherwise it just lists files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remove duplicate files: JPGs when RAW is present | |
from pathlib import Path | |
import os | |
class ProcessFiles(object): | |
""" | |
Walks over current dir and subdirectories searching for JPG and RAW (PEF) files. | |
Prints out a list of duplicates. | |
""" | |
def __init__(self): | |
self.raws = {} | |
self.du = 0 | |
raw_extension = "PEF" | |
jpg_extension = "jpg" | |
for path, subdirs, files in os.walk(Path()): | |
for name in files: | |
if name.lower().endswith(jpg_extension.lower()): | |
file_path = os.path.join(path, name) | |
raw_file = file_path[:-3] + raw_extension | |
if Path(raw_file).exists(): | |
self.raws[file_path] = raw_file | |
self.du += Path(file_path).stat().st_size | |
for jpg, raw in self.raws.items(): | |
# os.remove(jpg) | |
print(jpg) | |
print(len(self.raws)) | |
print(self.humansize(self.du)) | |
def humansize(self, nbytes): | |
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] | |
i = 0 | |
while nbytes >= 1024 and i < len(suffixes) - 1: | |
nbytes /= 1024. | |
i += 1 | |
f = ('%.2f' % nbytes).rstrip('0').rstrip('.') | |
return '%s %s' % (f, suffixes[i]) | |
if __name__ == "__main__": | |
ProcessFiles() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment