Skip to content

Instantly share code, notes, and snippets.

@bshillingford
Last active December 17, 2015 06:28
Show Gist options
  • Save bshillingford/5565243 to your computer and use it in GitHub Desktop.
Save bshillingford/5565243 to your computer and use it in GitHub Desktop.
Duplicate finder
#!/usr/bin/python2.7
import subprocess
from collections import defaultdict
from glob import glob
import sys
pattern = sys.argv[1]
d = defaultdict(lambda: [])
filenames = subprocess.check_output("find . -type f -name '{}' -print0 2>/dev/null; exit 0".format(pattern), shell=True).split('\0')
for filename in filenames:
if len(filename) == 0:
continue
md5 = subprocess.check_output(["md5sum", filename]).split()[0]
d[md5].append(filename)
print "Groups below (grouped by =======) are exact dupl."
for key in d.keys():
if len(d[key]) >= 2:
print '===================================='
for value in d[key]:
print value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment