Created
March 1, 2018 16:19
-
-
Save mu88/c07ced6cfdcb17c92f77957ffee0930a to your computer and use it in GitHub Desktop.
ArcPy - Compare two File Geodatabases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import arcpy, logging, math, os, shutil, sys | |
def GetCharDigitSum(s): | |
if s is None: | |
return 0 | |
sum = 0 | |
for c in s: | |
sum = sum + ord(c) | |
return sum | |
def CompareSpatially(fcl, gdb1, gdb2): | |
logging.debug("Doing spatial comparison of FCL '" + fcl + "' between '" + gdb1 + "' and '" + gdb2 + "'") | |
epsilon = 0.00001 | |
# check if comparison FGDB exists and create it if not present | |
comparisonFGDBName = "Comparison.gdb" | |
comparisonFGDB = comparisonPath + "/" + comparisonFGDBName | |
if not os.path.exists(comparisonFGDB): | |
logging.debug("FGDB '" + comparisonFGDB + "' does not exist and will be created") | |
arcpy.CreateFileGDB_management(comparisonPath, comparisonFGDBName) | |
previousWorkspace = arcpy.env.workspace | |
arcpy.env.workspace = comparisonFGDB | |
# preparations for spatial join | |
fcl1 = gdb1 + "/" + fcl | |
fcl2 = gdb2 + "/" + fcl | |
joinFeatureClass = comparisonFGDB + "/" + fcl | |
join_operation = "JOIN_ONE_TO_ONE" | |
join_type = "KEEP_ALL" | |
match_option = "ARE_IDENTICAL_TO" | |
# in case the current FCL has been compared before, the former comparison will be discarded | |
if arcpy.Exists(joinFeatureClass): | |
logging.debug("FCL '" + fcl + "' does already exist and will be deleted from comparison FGDB") | |
arcpy.Delete_management(joinFeatureClass) | |
# do the spatial join | |
arcpy.SpatialJoin_analysis(fcl1, fcl2, joinFeatureClass, join_operation=join_operation, join_type=join_type, match_option=match_option) | |
logging.debug("Join FCL '" + joinFeatureClass + "' has been created") | |
# get the distinct fields (because of joining, some of them obtain the suffix "_1" or have a special meaning for ESRI) | |
# floating point numbers have to be treated in a special manner | |
fieldList = arcpy.ListFields(joinFeatureClass) | |
distinctFields = [] | |
distinctFloatingFields = [] | |
for field in fieldList: | |
fieldName = field.name | |
fieldType = field.type | |
if not fieldName.endswith("_1") and not fieldName.endswith("1") and fieldName not in ['OBJECTID', 'GEOMETRIE', 'TARGET_FID', 'Join_Count', 'Shape', 'Shape_Length', 'Shape_Area', 'SHAPE_Length', 'SHAPE_Area']: | |
if fieldType in ['Double', 'Single']: | |
distinctFloatingFields.append(field) | |
else: | |
distinctFields.append(field) | |
# build up the WHERE clause => for floating point numbers, a numerical comparison with a delta will be done, otherwise pure value comparison | |
selectView = fcl + "_select" | |
whereClause = '"Join_Count" <> 1' | |
for field in distinctFields: | |
fieldName = field.name | |
whereClause = whereClause + ' OR "' + fieldName + '" <> "' | |
# when doing a spatial join, there is a different suffix for columns ending with an underscore | |
# typical/normal naming: original column name + "_1" | |
# naming for columns ending with an underscore: original column name - underscore + "1" | |
if fieldName.endswith("_"): | |
whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"' | |
else: | |
whereClause = whereClause + fieldName + '_1"' | |
for field in distinctFloatingFields: | |
fieldName = field.name | |
whereClause = whereClause + ' OR ABS("' + fieldName + '" - "' | |
# when doing a spatial join, there is a different suffix for columns ending with an underscore | |
# typical/normal naming: original column name + "_1" | |
# naming for columns ending with an underscore: original column name - underscore + "1" | |
if fieldName.endswith("_"): | |
whereClause = whereClause + fieldName[0:(len(fieldName) - 1)] + '1"' | |
else: | |
whereClause = whereClause + fieldName + '_1"' | |
whereClause = whereClause + ') > ' + str(epsilon) | |
# check whether there are not equal fields with the generated WHERE clause | |
logging.debug("Where clause: '" + whereClause + "'") | |
arcpy.MakeTableView_management(joinFeatureClass, selectView, whereClause) | |
count = int(arcpy.GetCount_management(selectView).getOutput(0)) | |
logMessage = "Number of not matching features: " + str(count) | |
arcpy.env.workspace = previousWorkspace | |
if count == 0: | |
logging.debug(logMessage) | |
return True | |
else: | |
logging.warn(logMessage) | |
# specific situation for project OEREB => URLs within a column of type String where mixed-up, but textual equal | |
# to handle this, it needs a detailed check treating each found feature | |
returnValue = True | |
innerAbort = False | |
rows = arcpy.SearchCursor(selectView) | |
for row in rows: | |
logging.debug("Doing detail check") | |
# do a delta comparison for the floating point numbers | |
for field in distinctFloatingFields: | |
fieldName = field.name | |
val1 = row.getValue(fieldName) | |
# when doing a spatial join, there is a different suffix for columns ending with an underscore | |
# typical/normal naming: original column name + "_1" | |
# naming for columns ending with an underscore: original column name - underscore + "1" | |
if fieldName.endswith("_"): | |
val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1') | |
else: | |
val2 = row.getValue(fieldName + "_1") | |
if abs(val1 - val2) > epsilon: | |
innerAbort = True | |
break | |
# compare the other fields in case no error has been encountered before | |
if innerAbort == False: | |
for field in distinctFields: | |
fieldName = field.name | |
val1 = row.getValue(fieldName) | |
# when doing a spatial join, there is a different suffix for columns ending with an underscore | |
# typical/normal naming: original column name + "_1" | |
# naming for columns ending with an underscore: original column name - underscore + "1" | |
if fieldName.endswith("_"): | |
val2 = row.getValue(fieldName[0:(len(fieldName) - 1)] + '1') | |
else: | |
val2 = row.getValue(fieldName + "_1") | |
if val1 <> val2: | |
# in case the field is of type String, the digit sum of the two values will compared | |
# => if they are equal, the fields are assumed equal as well | |
if field.type == "String": | |
s1 = GetCharDigitSum(val1) | |
s2 = GetCharDigitSum(val2) | |
if s1 <> s2: | |
logging.debug("CharDigitSum: " + str(s1) + " <> " + str(s2)) | |
innerAbort = True | |
break | |
else: | |
innerAbort = True | |
break | |
if innerAbort == True: | |
returnValue = False | |
break | |
del row, rows | |
return returnValue | |
def CompareOcls(ocls1, ocls2, mode): | |
ocls1NotInOcl2 = [] | |
ocls2NotInOcl1 = [] | |
commonOcls = [] | |
# OCL1 not in OCLS2 | |
for ocl1 in ocls1: | |
if ocl1 not in ocls2: | |
logging.warn(mode + " '" + ocl1 + "' is not in GDB2") | |
ocls1NotInOcl2.append(ocl1) | |
else: | |
logging.debug(mode + " '" + ocl1 + "' exists in both GDBs") | |
commonOcls.append(ocl1) | |
# OCL2 not in OCLS1 | |
for ocl2 in ocls2: | |
if ocl2 not in ocls1: | |
logging.warn(mode + " '" + ocl1 + "' is not in GDB1") | |
ocls2NotInOcl1.append(ocl2) | |
# compare the object classes that exist in both GDBs | |
oclsEqual = [] | |
oclsNotEqual = [] | |
for ocl in commonOcls: | |
ocl1 = gdb1 + "/" + ocl | |
ocl2 = gdb2 + "/" + ocl | |
sort_field = "OBJECTID" | |
compare_type = "ALL" | |
ignore_option = "" | |
xy_tolerance = "0.001 METERS" | |
m_tolerance = 0 | |
z_tolerance = 0 | |
attribute_tolerance = "" | |
omit_field = "" | |
continue_compare = "CONTINUE_COMPARE" | |
compare_file = comparisonPath + "/" + ocl + ".txt" | |
if mode == "FCL": | |
compare_result = arcpy.FeatureCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, xy_tolerance, m_tolerance, z_tolerance, attribute_tolerance, omit_field, continue_compare, compare_file) | |
elif mode == "TBL": | |
compare_result = arcpy.TableCompare_management(ocl1, ocl2, sort_field, compare_type, ignore_option, attribute_tolerance, omit_field, continue_compare, compare_file) | |
else: | |
sys.exit("Invalid mode, leaving application") | |
if compare_result[1] == "true": | |
logging.debug(mode + " '" + ocl + "' is equal in both GDBs") | |
oclsEqual.append(ocl) | |
else: | |
spatialCompareResult = CompareSpatially(ocl, gdb1, gdb2) | |
if spatialCompareResult == True: | |
logging.debug(mode + " '" + ocl + "' is equal in both GDBs") | |
oclsEqual.append(ocl) | |
else: | |
logging.warn(mode + " '" + ocl + "' is NOT equal in both GDBs") | |
oclsNotEqual.append(ocl) | |
logging.info("################################################################") | |
logging.info("############### RESULT #########################") | |
logging.info("### Number of equal " + mode + "s: " + str(len(oclsEqual))) | |
logging.info("### Number of NOT equal " + mode + "s: " + str(len(oclsNotEqual))) | |
logging.info("### Number of " + mode + "s that exist in GDB1 but not in GDB2: " + str(len(ocls1NotInOcl2))) | |
logging.info("### Number of " + mode + "s that exist in GDB2 but not in GDB1: " + str(len(ocls2NotInOcl1))) | |
logging.info("################################################################") | |
def listFclsInGDB(gdb): | |
logging.debug("Provided GDB: '" + gdb + "'") | |
arcpy.env.workspace = gdb | |
fcls = [] | |
datasets = arcpy.ListDatasets('', 'All') | |
if datasets is not None: | |
for fds in datasets + ['']: | |
logging.debug("Iterating FDS: '" + fds + "'") | |
for fcl in arcpy.ListFeatureClasses('', '', fds): | |
logging.debug("Iterating FCL: '" + fcl + "'") | |
fcls.append(os.path.join(fds, fcl)) | |
else: | |
for fcl in arcpy.ListFeatureClasses(): | |
logging.debug("Iterating FCL: '" + fcl + "'") | |
fcls.append(fcl) | |
return fcls | |
def listTblsInGDB(gdb): | |
logging.debug("Provided GDB: '" + gdb + "'") | |
arcpy.env.workspace = gdb | |
tbls = [] | |
for fds in arcpy.ListDatasets('', 'All') + ['']: | |
logging.debug("Iterating FDS: '" + fds + "'") | |
for tbl in arcpy.ListTables('', fds): | |
logging.debug("Iterating Table: '" + tbl + "'") | |
tbls.append(os.path.join(fds, tbl)) | |
return tbls | |
os.system("cls") | |
gdb1 = sys.argv[1] | |
gdb2 = sys.argv[2] | |
comparisonPath = sys.argv[3] + "/Comparison" | |
loglevel = sys.argv[4] | |
# set LogLevel | |
numeric_level = getattr(logging, loglevel.upper(), None) | |
if not isinstance(numeric_level, int): | |
raise ValueError('Invalid log level: %s' % loglevel) | |
logging.basicConfig(level=numeric_level) | |
print "Logging in level '" + loglevel + "'" | |
# truncate the directory where the comparison files will be placed | |
if os.path.exists(comparisonPath): | |
if os.listdir(comparisonPath) != "": | |
output = "Directory '" + comparisonPath + "' is not empty. Truncate? [y, n] " | |
truncate = raw_input(output) | |
if truncate == 'y': | |
# looks weird, but look here: http://stackoverflow.com/questions/16373747/permission-denied-doing-os-mkdird-after-running-shutil-rmtreed-in-python | |
tempPath = comparisonPath + "2" | |
os.rename(comparisonPath, tempPath) | |
shutil.rmtree(tempPath) | |
os.makedirs(comparisonPath) | |
elif truncate == "n": | |
sys.exit("Good bye") | |
else: | |
sys.exit("Invalid argument, leaving application") | |
else: | |
os.makedirs(comparisonPath) | |
logging.debug("Directory '" + comparisonPath + "' has been created") | |
# get all feature classes and tables of the GDB | |
fcls1 = listFclsInGDB(gdb1) | |
fcls2 = listFclsInGDB(gdb2) | |
tbls1 = listTblsInGDB(gdb1) | |
tbls2 = listTblsInGDB(gdb2) | |
if len(fcls1) > 0 or len(fcl2) > 0: | |
CompareOcls(fcls1, fcls2, "FCL") | |
if len(tbls1) > 0 or len(tbls2) > 0: | |
CompareOcls(tbls1, tbls2, "TBL") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment