mu88 · March 1, 2018 16:19
diff --git a/Compare.py b/Compare.py
 import arcpy, logging, math, os, shutil, sys

 def GetCharDigitSum(s):
    if s is None:
        return 0
    
    sum = 0
    for c in s:
        sum = sum + ord(c)

    return sum

 def CompareSpatially(fcl, gdb1, gdb2):
    logging.debug("Doing spatial comparison of FCL '" + fcl + "' between '" + gdb1 + "' and '" + gdb2 + "'")

    epsilon = 0.00001

    # check if comparison FGDB exists and create it if not present    
    comparisonFGDBName = "Comparison.gdb"
    comparisonFGDB = comparisonPath + "/" + comparisonFGDBName
    if not os.path.exists(comparisonFGDB):
        logging.debug("FGDB '" + comparisonFGDB + "' does not exist and will be created")
        arcpy.CreateFileGDB_management(comparisonPath, comparisonFGDBName)
    
    previousWorkspace = arcpy.env.workspace
    arcpy.env.workspace = comparisonFGDB

    # preparations for spatial join
    fcl1 = gdb1 + "/" + fcl
    fcl2 = gdb2 + "/" + fcl
    joinFeatureClass = comparisonFGDB + "/" + fcl
    join_operation = "JOIN_ONE_TO_ONE"
    join_type = "KEEP_ALL"
    match_option = "ARE_IDENTICAL_TO"

    # in case the current FCL has been compared before, the former comparison will be discarded
    if arcpy.Exists(joinFeatureClass):
        logging.debug("FCL '" + fcl + "' does already exist and will be deleted from comparison FGDB")
        arcpy.Delete_management(joinFeatureClass)
    
    # do the spatial join
    arcpy.SpatialJoin_analysis(fcl1, fcl2, joinFeatureClass, join_operation=join_operation, join_type=join_type, match_option=match_option)
    logging.debug("Join FCL '" + joinFeatureClass + "' has been created")

    # get the distinct fields (because of joining, some of them obtain the suffix "_1" or have a special meaning for ESRI)
    # floating point numbers have to be treated in a special manner
    fieldList = arcpy.ListFields(joinFeatureClass)
    distinctFields = []
    distinctFloatingFields = []
    for field in fieldList:
        fieldName = field.name
        fieldType = field.type
        if not fieldName.endswith("_1") and not fieldName.endswith("1") and fieldName not in ['OBJECTID', 'GEOMETRIE', 'TARGET_FID', 'Join_Count', 'Shape', 'Shape_Length', 'Shape_Area', 'SHAPE_Length', 'SHAPE_Area']:
            if fieldType in ['Double', 'Single']:
                distinctFloatingFields.append(field)
            else:
                distinctFields.append(field)

    # build up the WHERE clause => for floating point numbers, a numerical comparison with a delta will be done, otherwise pure value comparison
    selectView = fcl + "_select"
    whereClause = '"Join_Count" <> 1'
    for field in distinctFields:
        fieldName = field.name
        whereClause = whereClause + ' OR "' + fieldName + '" <> "'
        
        # when doing a spatial join, there is a different suffix for columns ending with an underscore
        # typical/normal naming: original column name + "_1"
        # naming for columns ending with an underscore: original column name - underscore + "1"
        if fieldName.endswith("_"):
            whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"'
        else:
            whereClause = whereClause + fieldName + '_1"'
    for field in distinctFloatingFields:
        fieldName = field.name
        whereClause = whereClause + ' OR ABS("' + fieldName + '" - "'
        
        # when doing a spatial join, there is a different suffix for columns ending with an underscore
        # typical/normal naming: original column name + "_1"
        # naming for columns ending with an underscore: original column name - underscore + "1"
        if fieldName.endswith("_"):
            whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"'
        else:
            whereClause = whereClause + fieldName + '_1"'

        whereClause = whereClause + ') > ' + str(epsilon)

    # check whether there are not equal fields with the generated WHERE clause
    logging.debug("Where clause: '" + whereClause + "'")
    arcpy.MakeTableView_management(joinFeatureClass, selectView, whereClause)
    count = int(arcpy.GetCount_management(selectView).getOutput(0))
    logMessage = "Number of not matching features: " + str(count)
    
    arcpy.env.workspace = previousWorkspace
    
    if count == 0:
        logging.debug(logMessage)

        return True
    else:
        logging.warn(logMessage)

        # specific situation for project OEREB => URLs within a column of type String where mixed-up, but textual equal
        # to handle this, it needs a detailed check treating each found feature
        returnValue = True
        innerAbort = False
        rows = arcpy.SearchCursor(selectView)
        for row in rows:
            logging.debug("Doing detail check")

            # do a delta comparison for the floating point numbers
            for field in distinctFloatingFields:
                fieldName = field.name
                val1 = row.getValue(fieldName)

                # when doing a spatial join, there is a different suffix for columns ending with an underscore
                # typical/normal naming: original column name + "_1"
                # naming for columns ending with an underscore: original column name - underscore + "1"
                if fieldName.endswith("_"):
                    val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1')
                else:
                    val2 = row.getValue(fieldName + "_1")
                
                if abs(val1 - val2) > epsilon:
                    innerAbort = True
                    break

            # compare the other fields in case no error has been encountered before
            if innerAbort == False:
                for field in distinctFields:
                    fieldName = field.name
                    val1 = row.getValue(fieldName)

                    # when doing a spatial join, there is a different suffix for columns ending with an underscore
                    # typical/normal naming: original column name + "_1"
                    # naming for columns ending with an underscore: original column name - underscore + "1"
                    if fieldName.endswith("_"):
                        val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1')
                    else:
                        val2 = row.getValue(fieldName + "_1")

                    if val1 <> val2:
                        # in case the field is of type String, the digit sum of the two values will compared
                        # => if they are equal, the fields are assumed equal as well
                        if field.type == "String":
                            s1 = GetCharDigitSum(val1)
                            s2 = GetCharDigitSum(val2)
                            if s1 <> s2:
                                logging.debug("CharDigitSum: " + str(s1) + " <> " + str(s2))
                                innerAbort = True
                                break
                        else:
                            innerAbort = True
                            break

            if innerAbort == True:
                returnValue = False
                break
            
        del row, rows
        
        return returnValue

 def CompareOcls(ocls1, ocls2, mode):
    ocls1NotInOcl2 = []
    ocls2NotInOcl1 = []
    commonOcls = []
    # OCL1 not in OCLS2
    for ocl1 in ocls1:
        if ocl1 not in ocls2:
            logging.warn(mode + " '" + ocl1 + "' is not in GDB2")
            ocls1NotInOcl2.append(ocl1)
        else:
            logging.debug(mode + " '" + ocl1 + "' exists in both GDBs")
            commonOcls.append(ocl1)

    # OCL2 not in OCLS1
    for ocl2 in ocls2:
        if ocl2 not in ocls1:
            logging.warn(mode + " '" + ocl1 + "' is not in GDB1")
            ocls2NotInOcl1.append(ocl2)

    # compare the object classes that exist in both GDBs
    oclsEqual = []
    oclsNotEqual = []

    for ocl in commonOcls:
        ocl1 = gdb1 + "/" + ocl
        ocl2 = gdb2 + "/" + ocl
        sort_field = "OBJECTID"
        compare_type = "ALL"
        ignore_option = ""
        xy_tolerance = "0.001 METERS"
        m_tolerance = 0
        z_tolerance = 0
        attribute_tolerance = ""
        omit_field = ""
        continue_compare = "CONTINUE_COMPARE"
        compare_file = comparisonPath + "/" + ocl + ".txt"

        if mode == "FCL":
            compare_result = arcpy.FeatureCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, xy_tolerance, m_tolerance, z_tolerance, attribute_tolerance, omit_field, continue_compare, compare_file)
        elif mode == "TBL":
            compare_result = arcpy.TableCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, attribute_tolerance, omit_field, continue_compare, compare_file)
        else:
            sys.exit("Invalid mode, leaving application")

        if compare_result[1] == "true":
            logging.debug(mode + " '" + ocl + "' is equal in both GDBs")
            oclsEqual.append(ocl)
        else:
            spatialCompareResult = CompareSpatially(ocl, gdb1, gdb2)
            if spatialCompareResult == True:
                logging.debug(mode + " '" + ocl + "' is equal in both GDBs")
                oclsEqual.append(ocl)
            else:
                logging.warn(mode + " '" + ocl + "' is NOT equal in both GDBs")
                oclsNotEqual.append(ocl)

    logging.info("################################################################")
    logging.info("###############          RESULT        #########################")
    logging.info("###   Number of equal " + mode + "s: " + str(len(oclsEqual)))
    logging.info("###   Number of NOT equal " + mode + "s: " + str(len(oclsNotEqual)))
    logging.info("###   Number of " + mode + "s that exist in GDB1 but not in GDB2: " + str(len(ocls1NotInOcl2)))
    logging.info("###   Number of " + mode + "s that exist in GDB2 but not in GDB1: " + str(len(ocls2NotInOcl1)))
    logging.info("################################################################")

 def listFclsInGDB(gdb):
    logging.debug("Provided GDB: '" + gdb + "'")
    arcpy.env.workspace = gdb
    
    fcls = []
    datasets = arcpy.ListDatasets('', 'All')

    if datasets is not None:
        for fds in datasets + ['']:
            logging.debug("Iterating FDS: '" + fds + "'")
            for fcl in arcpy.ListFeatureClasses('', '', fds):
                logging.debug("Iterating FCL: '" + fcl + "'")
                fcls.append(os.path.join(fds, fcl))
    else:
        for fcl in arcpy.ListFeatureClasses():
            logging.debug("Iterating FCL: '" + fcl + "'")
            fcls.append(fcl)

    return fcls

 def listTblsInGDB(gdb):
    logging.debug("Provided GDB: '" + gdb + "'")
    arcpy.env.workspace = gdb
    
    tbls = []
    for fds in arcpy.ListDatasets('', 'All') + ['']:
        logging.debug("Iterating FDS: '" + fds + "'")
        for tbl in arcpy.ListTables('', fds):
            logging.debug("Iterating Table: '" + tbl + "'")
            tbls.append(os.path.join(fds, tbl))

    return tbls

 os.system("cls")

 gdb1 = sys.argv[1]
 gdb2 = sys.argv[2]
 comparisonPath = sys.argv[3] + "/Comparison"
 loglevel = sys.argv[4]

 # set LogLevel
 numeric_level = getattr(logging, loglevel.upper(), None)
 if not isinstance(numeric_level, int):
    raise ValueError('Invalid log level: %s' % loglevel)
 logging.basicConfig(level=numeric_level)

 print "Logging in level '" + loglevel + "'"

 # truncate the directory where the comparison files will be placed
 if os.path.exists(comparisonPath):
    if os.listdir(comparisonPath) != "":
        output = "Directory '" + comparisonPath + "' is not empty. Truncate? [y, n] "
        truncate = raw_input(output)
        if truncate == 'y':
            # looks weird, but look here: http://stackoverflow.com/questions/16373747/permission-denied-doing-os-mkdird-after-running-shutil-rmtreed-in-python
            tempPath = comparisonPath + "2"
            os.rename(comparisonPath, tempPath)
            shutil.rmtree(tempPath)
            os.makedirs(comparisonPath)
        elif truncate == "n":
            sys.exit("Good bye")
        else:
            sys.exit("Invalid argument, leaving application")
 else:
    os.makedirs(comparisonPath)
    logging.debug("Directory '" + comparisonPath + "' has been created")

 # get all feature classes and tables of the GDB
 fcls1 = listFclsInGDB(gdb1)
 fcls2 = listFclsInGDB(gdb2)
 tbls1 = listTblsInGDB(gdb1)
 tbls2 = listTblsInGDB(gdb2)

 if len(fcls1) > 0 or len(fcl2) > 0:
    CompareOcls(fcls1, fcls2, "FCL")

 if len(tbls1) > 0 or len(tbls2) > 0:
    CompareOcls(tbls1, tbls2, "TBL")
	import arcpy, logging, math, os, shutil, sys

	def GetCharDigitSum(s):
	if s is None:
	return 0

	sum = 0
	for c in s:
	sum = sum + ord(c)

	return sum

	def CompareSpatially(fcl, gdb1, gdb2):
	logging.debug("Doing spatial comparison of FCL '" + fcl + "' between '" + gdb1 + "' and '" + gdb2 + "'")

	epsilon = 0.00001

	# check if comparison FGDB exists and create it if not present
	comparisonFGDBName = "Comparison.gdb"
	comparisonFGDB = comparisonPath + "/" + comparisonFGDBName
	if not os.path.exists(comparisonFGDB):
	logging.debug("FGDB '" + comparisonFGDB + "' does not exist and will be created")
	arcpy.CreateFileGDB_management(comparisonPath, comparisonFGDBName)

	previousWorkspace = arcpy.env.workspace
	arcpy.env.workspace = comparisonFGDB

	# preparations for spatial join
	fcl1 = gdb1 + "/" + fcl
	fcl2 = gdb2 + "/" + fcl
	joinFeatureClass = comparisonFGDB + "/" + fcl
	join_operation = "JOIN_ONE_TO_ONE"
	join_type = "KEEP_ALL"
	match_option = "ARE_IDENTICAL_TO"

	# in case the current FCL has been compared before, the former comparison will be discarded
	if arcpy.Exists(joinFeatureClass):
	logging.debug("FCL '" + fcl + "' does already exist and will be deleted from comparison FGDB")
	arcpy.Delete_management(joinFeatureClass)

	# do the spatial join
	arcpy.SpatialJoin_analysis(fcl1, fcl2, joinFeatureClass, join_operation=join_operation, join_type=join_type, match_option=match_option)
	logging.debug("Join FCL '" + joinFeatureClass + "' has been created")

	# get the distinct fields (because of joining, some of them obtain the suffix "_1" or have a special meaning for ESRI)
	# floating point numbers have to be treated in a special manner
	fieldList = arcpy.ListFields(joinFeatureClass)
	distinctFields = []
	distinctFloatingFields = []
	for field in fieldList:
	fieldName = field.name
	fieldType = field.type
	if not fieldName.endswith("_1") and not fieldName.endswith("1") and fieldName not in ['OBJECTID', 'GEOMETRIE', 'TARGET_FID', 'Join_Count', 'Shape', 'Shape_Length', 'Shape_Area', 'SHAPE_Length', 'SHAPE_Area']:
	if fieldType in ['Double', 'Single']:
	distinctFloatingFields.append(field)
	else:
	distinctFields.append(field)

	# build up the WHERE clause => for floating point numbers, a numerical comparison with a delta will be done, otherwise pure value comparison
	selectView = fcl + "_select"
	whereClause = '"Join_Count" <> 1'
	for field in distinctFields:
	fieldName = field.name
	whereClause = whereClause + ' OR "' + fieldName + '" <> "'

	# when doing a spatial join, there is a different suffix for columns ending with an underscore
	# typical/normal naming: original column name + "_1"
	# naming for columns ending with an underscore: original column name - underscore + "1"
	if fieldName.endswith("_"):
	whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"'
	else:
	whereClause = whereClause + fieldName + '_1"'
	for field in distinctFloatingFields:
	fieldName = field.name
	whereClause = whereClause + ' OR ABS("' + fieldName + '" - "'

	# when doing a spatial join, there is a different suffix for columns ending with an underscore
	# typical/normal naming: original column name + "_1"
	# naming for columns ending with an underscore: original column name - underscore + "1"
	if fieldName.endswith("_"):
	whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"'
	else:
	whereClause = whereClause + fieldName + '_1"'

	whereClause = whereClause + ') > ' + str(epsilon)

	# check whether there are not equal fields with the generated WHERE clause
	logging.debug("Where clause: '" + whereClause + "'")
	arcpy.MakeTableView_management(joinFeatureClass, selectView, whereClause)
	count = int(arcpy.GetCount_management(selectView).getOutput(0))
	logMessage = "Number of not matching features: " + str(count)

	arcpy.env.workspace = previousWorkspace

	if count == 0:
	logging.debug(logMessage)

	return True
	else:
	logging.warn(logMessage)

	# specific situation for project OEREB => URLs within a column of type String where mixed-up, but textual equal
	# to handle this, it needs a detailed check treating each found feature
	returnValue = True
	innerAbort = False
	rows = arcpy.SearchCursor(selectView)
	for row in rows:
	logging.debug("Doing detail check")

	# do a delta comparison for the floating point numbers
	for field in distinctFloatingFields:
	fieldName = field.name
	val1 = row.getValue(fieldName)

	# when doing a spatial join, there is a different suffix for columns ending with an underscore
	# typical/normal naming: original column name + "_1"
	# naming for columns ending with an underscore: original column name - underscore + "1"
	if fieldName.endswith("_"):
	val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1')
	else:
	val2 = row.getValue(fieldName + "_1")

	if abs(val1 - val2) > epsilon:
	innerAbort = True
	break

	# compare the other fields in case no error has been encountered before
	if innerAbort == False:
	for field in distinctFields:
	fieldName = field.name
	val1 = row.getValue(fieldName)

	# when doing a spatial join, there is a different suffix for columns ending with an underscore
	# typical/normal naming: original column name + "_1"
	# naming for columns ending with an underscore: original column name - underscore + "1"
	if fieldName.endswith("_"):
	val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1')
	else:
	val2 = row.getValue(fieldName + "_1")

	if val1 <> val2:
	# in case the field is of type String, the digit sum of the two values will compared
	# => if they are equal, the fields are assumed equal as well
	if field.type == "String":
	s1 = GetCharDigitSum(val1)
	s2 = GetCharDigitSum(val2)
	if s1 <> s2:
	logging.debug("CharDigitSum: " + str(s1) + " <> " + str(s2))
	innerAbort = True
	break
	else:
	innerAbort = True
	break

	if innerAbort == True:
	returnValue = False
	break

	del row, rows

	return returnValue

	def CompareOcls(ocls1, ocls2, mode):
	ocls1NotInOcl2 = []
	ocls2NotInOcl1 = []
	commonOcls = []
	# OCL1 not in OCLS2
	for ocl1 in ocls1:
	if ocl1 not in ocls2:
	logging.warn(mode + " '" + ocl1 + "' is not in GDB2")
	ocls1NotInOcl2.append(ocl1)
	else:
	logging.debug(mode + " '" + ocl1 + "' exists in both GDBs")
	commonOcls.append(ocl1)

	# OCL2 not in OCLS1
	for ocl2 in ocls2:
	if ocl2 not in ocls1:
	logging.warn(mode + " '" + ocl1 + "' is not in GDB1")
	ocls2NotInOcl1.append(ocl2)

	# compare the object classes that exist in both GDBs
	oclsEqual = []
	oclsNotEqual = []

	for ocl in commonOcls:
	ocl1 = gdb1 + "/" + ocl
	ocl2 = gdb2 + "/" + ocl
	sort_field = "OBJECTID"
	compare_type = "ALL"
	ignore_option = ""
	xy_tolerance = "0.001 METERS"
	m_tolerance = 0
	z_tolerance = 0
	attribute_tolerance = ""
	omit_field = ""
	continue_compare = "CONTINUE_COMPARE"
	compare_file = comparisonPath + "/" + ocl + ".txt"

	if mode == "FCL":
	compare_result = arcpy.FeatureCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, xy_tolerance, m_tolerance, z_tolerance, attribute_tolerance, omit_field, continue_compare, compare_file)
	elif mode == "TBL":
	compare_result = arcpy.TableCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, attribute_tolerance, omit_field, continue_compare, compare_file)
	else:
	sys.exit("Invalid mode, leaving application")

	if compare_result[1] == "true":
	logging.debug(mode + " '" + ocl + "' is equal in both GDBs")
	oclsEqual.append(ocl)
	else:
	spatialCompareResult = CompareSpatially(ocl, gdb1, gdb2)
	if spatialCompareResult == True:
	logging.debug(mode + " '" + ocl + "' is equal in both GDBs")
	oclsEqual.append(ocl)
	else:
	logging.warn(mode + " '" + ocl + "' is NOT equal in both GDBs")
	oclsNotEqual.append(ocl)

	logging.info("################################################################")
	logging.info("############### RESULT #########################")
	logging.info("### Number of equal " + mode + "s: " + str(len(oclsEqual)))
	logging.info("### Number of NOT equal " + mode + "s: " + str(len(oclsNotEqual)))
	logging.info("### Number of " + mode + "s that exist in GDB1 but not in GDB2: " + str(len(ocls1NotInOcl2)))
	logging.info("### Number of " + mode + "s that exist in GDB2 but not in GDB1: " + str(len(ocls2NotInOcl1)))
	logging.info("################################################################")

	def listFclsInGDB(gdb):
	logging.debug("Provided GDB: '" + gdb + "'")
	arcpy.env.workspace = gdb

	fcls = []
	datasets = arcpy.ListDatasets('', 'All')

	if datasets is not None:
	for fds in datasets + ['']:
	logging.debug("Iterating FDS: '" + fds + "'")
	for fcl in arcpy.ListFeatureClasses('', '', fds):
	logging.debug("Iterating FCL: '" + fcl + "'")
	fcls.append(os.path.join(fds, fcl))
	else:
	for fcl in arcpy.ListFeatureClasses():
	logging.debug("Iterating FCL: '" + fcl + "'")
	fcls.append(fcl)

	return fcls

	def listTblsInGDB(gdb):
	logging.debug("Provided GDB: '" + gdb + "'")
	arcpy.env.workspace = gdb

	tbls = []
	for fds in arcpy.ListDatasets('', 'All') + ['']:
	logging.debug("Iterating FDS: '" + fds + "'")
	for tbl in arcpy.ListTables('', fds):
	logging.debug("Iterating Table: '" + tbl + "'")
	tbls.append(os.path.join(fds, tbl))

	return tbls

	os.system("cls")

	gdb1 = sys.argv[1]
	gdb2 = sys.argv[2]
	comparisonPath = sys.argv[3] + "/Comparison"
	loglevel = sys.argv[4]

	# set LogLevel
	numeric_level = getattr(logging, loglevel.upper(), None)
	if not isinstance(numeric_level, int):
	raise ValueError('Invalid log level: %s' % loglevel)
	logging.basicConfig(level=numeric_level)

	print "Logging in level '" + loglevel + "'"

	# truncate the directory where the comparison files will be placed
	if os.path.exists(comparisonPath):
	if os.listdir(comparisonPath) != "":
	output = "Directory '" + comparisonPath + "' is not empty. Truncate? [y, n] "
	truncate = raw_input(output)
	if truncate == 'y':
	# looks weird, but look here: http://stackoverflow.com/questions/16373747/permission-denied-doing-os-mkdird-after-running-shutil-rmtreed-in-python
	tempPath = comparisonPath + "2"
	os.rename(comparisonPath, tempPath)
	shutil.rmtree(tempPath)
	os.makedirs(comparisonPath)
	elif truncate == "n":
	sys.exit("Good bye")
	else:
	sys.exit("Invalid argument, leaving application")
	else:
	os.makedirs(comparisonPath)
	logging.debug("Directory '" + comparisonPath + "' has been created")

	# get all feature classes and tables of the GDB
	fcls1 = listFclsInGDB(gdb1)
	fcls2 = listFclsInGDB(gdb2)
	tbls1 = listTblsInGDB(gdb1)
	tbls2 = listTblsInGDB(gdb2)

	if len(fcls1) > 0 or len(fcl2) > 0:
	CompareOcls(fcls1, fcls2, "FCL")

	if len(tbls1) > 0 or len(tbls2) > 0:
	CompareOcls(tbls1, tbls2, "TBL")