Last active
December 30, 2017 08:56
-
-
Save lispandfound/d2b4c1868f65080a80dbb8e30775feee to your computer and use it in GitHub Desktop.
duplicates.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import print_function | |
| import sys | |
| import os | |
| import subprocess | |
| def get_output(command): | |
| return subprocess.check_output(command, shell=True) | |
| def find_duplicates(directory): | |
| duplicates = {} | |
| for fp in os.listdir(directory): | |
| fullpath = os.path.join(directory, fp) | |
| if fullpath.startswith('.') or not os.path.isfile(fullpath): | |
| continue | |
| md5sum = get_output('openssl md5 \"{}\"'.format(fullpath)).split(' ')[-1] | |
| old_entries = duplicates.get(md5sum, []) | |
| old_entries.append(fp) | |
| duplicates[md5sum] = old_entries | |
| return duplicates | |
| def main(): | |
| if len(sys.argv) < 1: | |
| print('Usage: duplicates.py <path>') | |
| else: | |
| directory = sys.argv[1] | |
| duplicates_path = os.path.join(directory, 'duplicates') | |
| if not os.path.exists(duplicates_path): | |
| os.mkdir(duplicates_path) | |
| duplicates = find_duplicates(directory) | |
| for md5sum, files in duplicates.items(): | |
| if len(files) <= 1: | |
| continue | |
| print('The following files have the same md5sum:') | |
| print('\n'.join(files)) | |
| if raw_input('Keep only the first? ') == 'y': | |
| if raw_input('This will MOVE the following to the duplicates directory: {}, continue? '.format(' '.join(files[1:]))) == 'y': | |
| for file in files[1:]: | |
| os.rename(os.path.join(directory, file), os.path.join(duplicates_path, file)) | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment