Skip to content

Instantly share code, notes, and snippets.

@TimSC
Created May 11, 2014 12:27
Show Gist options
  • Save TimSC/164ebcd3f5ab1a4b1c95 to your computer and use it in GitHub Desktop.
Save TimSC/164ebcd3f5ab1a4b1c95 to your computer and use it in GitHub Desktop.
Recursively compare files in two folders
#Recursively compare files in two folders
#by Tim Sheerman-Chase, 2014
#Released under the CC0 license
import sys, os, hashlib
def CheckFolderPair(fo1, fo2, stats):
fiList1 = os.listdir(fo1)
for fina in fiList1:
finaFull1 = fo1+"/"+fina
finaFull2 = fo2+"/"+fina
#Ignore certain types of system folders on NAS box
if fina in ['#recycle', '@eaDir']:
continue
print finaFull1, stats
if os.path.isfile(finaFull1):
if os.path.isfile(finaFull2):
size1 = os.stat(finaFull1).st_size
size2 = os.stat(finaFull2).st_size
if size1 != size2:
stats['wrong-size'] += 1
else:
#Compare hashes
#print fina
ha1 = hashlib.md5()
ha1.update(open(finaFull1, "r").read())
ha2 = hashlib.md5()
ha2.update(open(finaFull2, "r").read())
if ha1.digest() != ha2.digest():
print [ord(ch) for ch in ha1.digest()]
print [ord(ch) for ch in ha2.digest()]
stats['wrong-hash'] += 1
else:
stats['missing-file2'] += 1
if os.path.isdir(finaFull1):
if os.path.isdir(finaFull2):
CheckFolderPair(finaFull1, finaFull2, stats)
else:
stats['missing-dir2'] += 1
if __name__ == "__main__":
if len(sys.argv) < 3:
print "Usage: {0} folder1 folder2".format(sys.argv[0])
exit(0)
stats = {}
stats['missing-file2'] = 0
stats['missing-dir2'] = 0
stats['wrong-size'] = 0
stats['wrong-hash'] = 0
CheckFolderPair(sys.argv[1], sys.argv[2], stats)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment