Created
September 30, 2014 22:11
-
-
Save tierra/b90c4aeb73c7ad8dcef3 to your computer and use it in GitHub Desktop.
Recursively remove duplicate files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
from optparse import OptionParser | |
def remove_duplicates(working_path, compare_path, pretend = False, | |
compare_size = False, recurse = False): | |
'''Compares contents of two paths, removing | |
identical content in the working path.''' | |
contents = os.listdir(working_path) | |
files = [f for f in contents if | |
os.path.isfile(os.path.join(working_path, f)) and | |
os.path.exists(os.path.join(compare_path, f)) and | |
os.path.isfile(os.path.join(compare_path, f))] | |
for file in files: | |
working_file = os.path.join(working_path, file) | |
compare_file = os.path.join(compare_path, file) | |
if compare_size: | |
if os.path.getsize(working_file) == os.path.getsize(compare_file): | |
print 'Deleting %s' % working_file | |
if not pretend: os.unlink(working_file) | |
else: | |
print 'Deleting %s' % working_file | |
if not pretend: os.unlink(working_file) | |
if recurse: | |
directories = [d for d in contents if | |
os.path.isdir(os.path.join(working_path, d)) and | |
os.path.exists(os.path.join(compare_path, d)) and | |
os.path.isdir(os.path.join(compare_path, d))] | |
for dir in directories: | |
working_dir = os.path.join(working_path, dir) | |
remove_duplicates( | |
working_dir, os.path.join(compare_path, dir), | |
pretend, compare_size, recurse) | |
if not os.listdir(working_dir): | |
print 'Removing empty directory: %s' % working_dir | |
os.rmdir(working_dir) | |
if __name__ == '__main__': | |
parser = OptionParser( | |
usage = '%prog [-p] [-s] [-r] working_path compare_path', | |
version = '%prog 0.1') | |
parser.add_option('-p', '--pretend', | |
action = "store_true", dest = "pretend", default = False, | |
help = 'Shows the files which will be deleted without deleting them.') | |
parser.add_option('-s', '--size', | |
action = "store_true", dest = "compare_size", default = False, | |
help = 'Compares the file sizes in addition to names.') | |
parser.add_option('-r', '--recursive', | |
action = "store_true", dest = "recurse", default = False, | |
help = 'Make a recursive comparison through directories.') | |
(options, args) = parser.parse_args() | |
if len(args) != 2: | |
parser.error('Incorrect number of arguments.') | |
remove_duplicates(args[0], args[1], options.pretend, | |
options.compare_size, options.recurse) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment