Last active
September 24, 2019 16:54
-
-
Save alexfriant/b036f93b0826aaed90fa to your computer and use it in GitHub Desktop.
Gives you a data report for the contents of an attribute table in arcgis shapefiles or geodatabase feature classes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################# | |
# | |
# Requirements: You'll need ArcGIS Desktop 10.1 or higher with Python 2.7+ | |
# | |
# Give this script a shapefile or a feature class and it will give you a summary | |
# report for each field (column) in the attribute table. It helps you get a feel | |
# for how "complete" the attributes are populated during data assessment. It's | |
# useful if you need to compare two similar data sources too if you fiddle with | |
# the results in a spreadsheet program. | |
# | |
# The output looks like this | |
# | |
# name datatype percent_pop count_pop count_empty | |
# APN String 100% 65332 0 | |
# Prop_Value Double 78% 50959 14373 | |
# ZipCode String 6% 3919 61413 | |
# | |
# Feel free to use, rewrite, and distribute as you wish. | |
# | |
################################################################################# | |
import arcpy | |
#argument inputs | |
#if you would rather just manually enter your 2 input variables, do so here | |
#designate your geodatabase table, feature class or shapefile here | |
#in_table = 'c:/localdata/sandbox/mytables.gdb/fc1' | |
#designate the folder you would like your output CSV file to written to here | |
#out_path = 'c:/localdata/sandbox' | |
#if you use this as a script in a GP toolbox, use these variables below instead | |
#and make sure to include these as parameters in your script's properties | |
in_table = arcpy.GetParameterAsText(0) | |
out_path = arcpy.GetParameterAsText(1) | |
#search exclusions | |
#define the fields you aren't interested in analyzing data values for | |
excluded_fields = ['objectid','fid','globalid','shape', | |
'shape_area','shape.area', | |
'shape_length','shape.len','shape_len'] | |
#define what's considered "empty" data | |
empty_values = [None,'None',''] | |
#set the CSV report column headers here | |
## if you modify these, then you need to modify the value outputs near the end of the | |
## main field iteration loop below | |
report = 'name,alias,datatype,percent_pop,count_pop,count_empty\n' | |
#derive output filename | |
desc_fc = arcpy.Describe(in_table) | |
out_name = '/' + desc_fc.name + '.csv' | |
out_file = out_path + out_name | |
#get field list | |
fields = arcpy.ListFields(in_table) | |
## go through each field in the attribute table | |
for field in fields: | |
if field.name.lower() not in excluded_fields: | |
message = 'analyzing ' + field.name + '...' | |
print message | |
arcpy.AddMessage( message ) | |
cursor = arcpy.SearchCursor(in_table, None, None, field.name) | |
empty_count = 0 | |
populated_count = 0 | |
## go through each row in the attribute table for this particular field | |
for row in cursor: | |
data = row.getValue(field.name) | |
try: | |
if str(data).strip() in empty_values: | |
empty_count += 1 | |
else: | |
populated_count += 1 | |
except: | |
if data.encode('utf-8').strip() in empty_values: | |
empty_count += 1 | |
else: | |
populated_count += 1 | |
if populated_count + empty_count == 0: | |
percent_pop = 0 | |
else: | |
percent_pop = float(populated_count) / (populated_count + empty_count) * 100 | |
#write output to our temporary report string | |
## if you modify these output values, then you must modify the CSV report | |
## headers above where 'report' is first defined | |
report += field.name + ',' + \ | |
field.aliasName + ',' + \ | |
field.type + ',' + \ | |
'%3.2f%%' % percent_pop + ',' + \ | |
str(populated_count) + ',' + \ | |
str(empty_count) + '\n' | |
#if you run it in IDLE or PyWin, you'll see some logging from here | |
print report | |
#if you run it as a GP script, you'll see some logging from here | |
arcpy.AddMessage( report ) | |
##write the report to output file | |
f = open(out_file, 'w') | |
f.write(report) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment