Created
May 11, 2014 12:27
-
-
Save TimSC/164ebcd3f5ab1a4b1c95 to your computer and use it in GitHub Desktop.
Recursively compare files in two folders
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Recursively compare files in two folders | |
#by Tim Sheerman-Chase, 2014 | |
#Released under the CC0 license | |
import sys, os, hashlib | |
def CheckFolderPair(fo1, fo2, stats): | |
fiList1 = os.listdir(fo1) | |
for fina in fiList1: | |
finaFull1 = fo1+"/"+fina | |
finaFull2 = fo2+"/"+fina | |
#Ignore certain types of system folders on NAS box | |
if fina in ['#recycle', '@eaDir']: | |
continue | |
print finaFull1, stats | |
if os.path.isfile(finaFull1): | |
if os.path.isfile(finaFull2): | |
size1 = os.stat(finaFull1).st_size | |
size2 = os.stat(finaFull2).st_size | |
if size1 != size2: | |
stats['wrong-size'] += 1 | |
else: | |
#Compare hashes | |
#print fina | |
ha1 = hashlib.md5() | |
ha1.update(open(finaFull1, "r").read()) | |
ha2 = hashlib.md5() | |
ha2.update(open(finaFull2, "r").read()) | |
if ha1.digest() != ha2.digest(): | |
print [ord(ch) for ch in ha1.digest()] | |
print [ord(ch) for ch in ha2.digest()] | |
stats['wrong-hash'] += 1 | |
else: | |
stats['missing-file2'] += 1 | |
if os.path.isdir(finaFull1): | |
if os.path.isdir(finaFull2): | |
CheckFolderPair(finaFull1, finaFull2, stats) | |
else: | |
stats['missing-dir2'] += 1 | |
if __name__ == "__main__": | |
if len(sys.argv) < 3: | |
print "Usage: {0} folder1 folder2".format(sys.argv[0]) | |
exit(0) | |
stats = {} | |
stats['missing-file2'] = 0 | |
stats['missing-dir2'] = 0 | |
stats['wrong-size'] = 0 | |
stats['wrong-hash'] = 0 | |
CheckFolderPair(sys.argv[1], sys.argv[2], stats) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment