Skip to content

Instantly share code, notes, and snippets.

@silicontrip
Created July 25, 2016 05:13
Show Gist options
  • Select an option

  • Save silicontrip/f8b5b9f2482eda636240c01625d38678 to your computer and use it in GitHub Desktop.

Select an option

Save silicontrip/f8b5b9f2482eda636240c01625d38678 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import os
import sys
import hashlib
def get_files(path):
fl = os.walk(path)
allfiles=[]
for pp,pd,pf in fl:
for fp in pf:
ffp= os.path.join(pp,fp)
allfiles.append(ffp)
return allfiles
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
af = get_files(sys.argv[1])
fcount = str(af.__len__())
print "files: " + fcount
count=0
dupes={}
for fn in af:
print str(count) + "/" + fcount + " : " + fn
count += 1
md = md5(fn)
if md not in dupes:
dupes[md] = []
else:
print "Duplicate" , dupes[md]
dupes[md].append(fn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment