Skip to content

Instantly share code, notes, and snippets.

@MarcAlx
Last active October 2, 2025 08:12
Show Gist options
  • Save MarcAlx/4cec595ec20c945a6d9191cf5bea5ce6 to your computer and use it in GitHub Desktop.
Save MarcAlx/4cec595ec20c945a6d9191cf5bea5ce6 to your computer and use it in GitHub Desktop.
ArcPy Feature Class anonymizer
# script to anonymize attributes and geometry of a feature class
#
# /!\ String anonymization is light and based on MD5, use only for visual hidding not for security!
# /!\ assumes SR are in meters like 102110/2154
#
# To run:
# & 'C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\python.exe' .\arcgis-anonymizer.py
import arcpy
import math
import random
import hashlib
def create_circle_from_point(point, radius, num_sides=64, spatial_ref=None):
if isinstance(point, tuple):
center = arcpy.Point(point[0], point[1])
elif isinstance(point, arcpy.Point):
center = point
else:
raise ValueError("")
if spatial_ref is None:
spatial_ref = arcpy.SpatialReference(4326) # WGS84 par défaut
# compute vertices
angle_step = 2 * math.pi / num_sides
vertices = []
for i in range(num_sides):
angle = i * angle_step
x = center.X + radius * math.cos(angle)
y = center.Y + radius * math.sin(angle)
vertices.append(arcpy.Point(x, y))
# close circle
vertices.append(vertices[0])
# build geom
array = arcpy.Array(vertices)
return arcpy.Polygon(array, spatial_ref)
def anonymize_fc_geom(fc):
"""
anonymize geometry in an fc
=> locate all geometry in an fc in the center of the atlantic, reshaped as random size circle, arranged in square
"""
#number of features
count = int(arcpy.GetCount_management(fc).getOutput(0))
#grid width to re-arrange features
width = math.ceil(math.sqrt(count))
#grid spacing (in m)
spacing = 100
#item padding inside grid case
padding = 5
#nb of vertices for resulting geom
geom_precision = 64
#identify new origin
sr_wgs84 = arcpy.SpatialReference(4326)
pt = arcpy.Point(-40.0, 30.0)#middle of atlantic
pt_geom = arcpy.PointGeometry(pt, sr_wgs84)
sr = arcpy.SpatialReference(2154)
pt = pt_geom.projectAs(sr)
newOrigin = pt.firstPoint
curr_item = 0
curr_x, curr_y = 0, 0
with arcpy.da.UpdateCursor(fc, ['SHAPE@']) as cursor:
print(f"Anonymizing: '{fc}'")
for row in cursor:
print(f'handling item: {curr_item+1} / {count}')
curr_x = curr_item % width
curr_y = curr_item // width
curr_center_x = newOrigin.X + (curr_x * spacing)
curr_center_y = newOrigin.Y + (curr_y * spacing)
curr_max_radius = (spacing - (2*padding)) / 2
curr_radius = random.uniform(spacing / 4, curr_max_radius)
curr_geom = create_circle_from_point((curr_center_x, curr_center_y), curr_radius, geom_precision, arcpy.SpatialReference(2154))
row[0] = curr_geom
cursor.updateRow(row)
curr_item += 1
def anonymize_fc_attribute(fc, field, string_mapper=None, range_min = None, range_max = None):
"""
anonymize one field in one fc
you can provide a mapper that will map explicit string value, else fallback to md5 hash (light anonymization)
for numeric a range may be provided for randomization, else fallback to rand between 0 and 200
"""
#number of features
count = int(arcpy.GetCount_management(fc).getOutput(0))
fields = arcpy.ListFields(fc, field)
field_in_fc = None
if(len(fields) == 0):
print(f"error '{field}' not fould in '{fc}'")
return
field_in_fc = fields[0]
if(field_in_fc.type not in ['String', 'Double', 'Integer']):
print(f"Can't set attribute wrong type: '${field_in_fc.type}' expected: 'String', 'Double', 'Integer'")
with arcpy.da.UpdateCursor(fc, [field]) as cursor:
print(f"Anonymizing: '{field}' of '{fc}'")
curr_item = 0
for row in cursor:
print(f'handling item: {curr_item+1} / {count}')
if(field_in_fc.type == 'String'):
if(string_mapper is not None):
if(row[0] in string_mapper):
row[0] = string_mapper[row[0]]
else:
print(f"'{row[0]}' missing in mapper for field '{field}' in '{fc}'")
elif(isinstance(row[0], str)):
row[0] = hashlib.md5(row[0].encode('utf-8')).hexdigest()
#null remains null
elif(field_in_fc.type == 'Double'):
if(range_min is not None and range_max is not None):
row[0] = random.randrange(range_min, range_max)
else:
row[0] = random.randrange(0, 200)
elif(field_in_fc.type == 'Integer'):
if(range_min is not None and range_max is not None):
row[0] = random.randint(range_min, range_max)
else:
row[0] = random.randint(0, 200)
cursor.updateRow(row)
curr_item += 1
if __name__ == '__main__':
fc = 'C:\\path\\to\\my.gdb\\my_fc'
anonymize_fc_geom(fc)
#string attribute anonymized by preserving categories
anonymize_fc_attribute(fc, "type", {'type 1': 'A', 'type 2': 'B' })
#string attribute that will md5, (preserve unicity while hidding sensible text, only visual)
anonymize_fc_attribute(fc, "a-string-field")
#fixed range numeric
anonymize_fc_attribute(fc, "a-numeric-field", range_min=0, range_max=2048)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment