Skip to content

Instantly share code, notes, and snippets.

@AndiH
Created December 14, 2015 11:53
Show Gist options
  • Save AndiH/d7a097f3e1c57e21310f to your computer and use it in GitHub Desktop.
Save AndiH/d7a097f3e1c57e21310f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import filecmp
import subprocess
def compareFilecmp(a, b):
# See https://docs.python.org/3/library/filecmp.html
return filecmp.cmp(a.name, b.name)
def compareFileLengths(a, b):
lenA = int(subprocess.check_output(['wc', '-l', a.name]).split()[0])
lenB = int(subprocess.check_output(['wc', '-l', b.name]).split()[0])
return lenA == lenB
def compareFilesAbsDiff(a, b):
isequal = True
absdiff = []
for index, (aLine, bLine) in enumerate(zip(a, b)):
aLine = aLine.split()
bLine = bLine.split()
localAbsdiff = 0
for (aElem, bElem) in zip(aLine, bLine):
localAbsdiff += abs(float(aElem) - float(bElem))
if (localAbsdiff > 1e-8):
print("There's a total difference of " + repr(localAbsdiff) + " in line " + repr(index) + "!")
isequal = False
absdiff.append(localAbsdiff)
return isequal, absdiff
def main(refFileName = "reference.dat", newFileName = "new.dat"):
"""Compare Two Files for Equality
This method offers 2.5 strategies for comparing an inputfile against a reference file.
1) It uses the official Python filecmp package (which checks the two files line by line)
1.5) It looks if both files have the same amount of lines (needed for 2.5)
2.5) For matrix input. It goes through a file line by line and compares each element of a line (separated by a space). Floating point values are expected and an absolute difference is taken. The files are considered equal if each absolute difference is smaller than a tiny value.
-Andreas Herten, 14 Dec 2015
"""
refFile = open(refFileName, "r")
newFile = open(newFileName, 'r')
# First, use Python's filecmp to compare
boolFilecmp = compareFilecmp(refFile, newFile)
if not boolFilecmp:
print("The two files " + repr(refFile.name) + " and " + repr(newFile.name) + " are not equal.")
# Compare file lengths
boolFileLengths = compareFileLengths(refFile, newFile)
if not boolFileLengths:
print("The two files " + repr(refFile.name) + " and " + repr(newFile.name) + " are not of equal length!")
# In-depth compare of all elements
if boolFileLengths:
absDiff = compareFilesAbsDiff(refFile, newFile)
boolAbsDiff = absDiff[0]
if not boolAbsDiff:
print("The elements of the two files " + repr(refFile.name) + " and " + repr(newFile.name) + " are different!" + "The greatest accumulated total difference in a line is " + repr(max(absDiff[1])))
if boolFilecmp and boolFileLengths and boolAbsDiff:
print("The files appear to be the same!")
else:
print("The files appear to be different!")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Compare two files for equality")
parser.add_argument("reference_file", type=str, nargs='?', help="The reference file to compare the new file against", default="reference.dat")
parser.add_argument("new_file", type=str, nargs='?', help="The new file", default="new.dat")
args = parser.parse_args()
main(args.reference_file, args.new_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment