Skip to content

Instantly share code, notes, and snippets.

@lispandfound
Last active December 30, 2017 08:56
Show Gist options
  • Save lispandfound/d2b4c1868f65080a80dbb8e30775feee to your computer and use it in GitHub Desktop.
Save lispandfound/d2b4c1868f65080a80dbb8e30775feee to your computer and use it in GitHub Desktop.
duplicates.py
from __future__ import print_function
import sys
import os
import subprocess
def get_output(command):
return subprocess.check_output(command, shell=True)
def find_duplicates(directory):
duplicates = {}
for fp in os.listdir(directory):
fullpath = os.path.join(directory, fp)
if fullpath.startswith('.') or not os.path.isfile(fullpath):
continue
md5sum = get_output('openssl md5 \"{}\"'.format(fullpath)).split(' ')[-1]
old_entries = duplicates.get(md5sum, [])
old_entries.append(fp)
duplicates[md5sum] = old_entries
return duplicates
def main():
if len(sys.argv) < 1:
print('Usage: duplicates.py <path>')
else:
directory = sys.argv[1]
duplicates_path = os.path.join(directory, 'duplicates')
if not os.path.exists(duplicates_path):
os.mkdir(duplicates_path)
duplicates = find_duplicates(directory)
for md5sum, files in duplicates.items():
if len(files) <= 1:
continue
print('The following files have the same md5sum:')
print('\n'.join(files))
if raw_input('Keep only the first? ') == 'y':
if raw_input('This will MOVE the following to the duplicates directory: {}, continue? '.format(' '.join(files[1:]))) == 'y':
for file in files[1:]:
os.rename(os.path.join(directory, file), os.path.join(duplicates_path, file))
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment