Last active
October 2, 2025 08:12
-
-
Save MarcAlx/4cec595ec20c945a6d9191cf5bea5ce6 to your computer and use it in GitHub Desktop.
ArcPy Feature Class anonymizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# script to anonymize attributes and geometry of a feature class | |
# | |
# /!\ String anonymization is light and based on MD5, use only for visual hidding not for security! | |
# /!\ assumes SR are in meters like 102110/2154 | |
# | |
# To run: | |
# & 'C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\python.exe' .\arcgis-anonymizer.py | |
import arcpy | |
import math | |
import random | |
import hashlib | |
def create_circle_from_point(point, radius, num_sides=64, spatial_ref=None): | |
if isinstance(point, tuple): | |
center = arcpy.Point(point[0], point[1]) | |
elif isinstance(point, arcpy.Point): | |
center = point | |
else: | |
raise ValueError("") | |
if spatial_ref is None: | |
spatial_ref = arcpy.SpatialReference(4326) # WGS84 par défaut | |
# compute vertices | |
angle_step = 2 * math.pi / num_sides | |
vertices = [] | |
for i in range(num_sides): | |
angle = i * angle_step | |
x = center.X + radius * math.cos(angle) | |
y = center.Y + radius * math.sin(angle) | |
vertices.append(arcpy.Point(x, y)) | |
# close circle | |
vertices.append(vertices[0]) | |
# build geom | |
array = arcpy.Array(vertices) | |
return arcpy.Polygon(array, spatial_ref) | |
def anonymize_fc_geom(fc): | |
""" | |
anonymize geometry in an fc | |
=> locate all geometry in an fc in the center of the atlantic, reshaped as random size circle, arranged in square | |
""" | |
#number of features | |
count = int(arcpy.GetCount_management(fc).getOutput(0)) | |
#grid width to re-arrange features | |
width = math.ceil(math.sqrt(count)) | |
#grid spacing (in m) | |
spacing = 100 | |
#item padding inside grid case | |
padding = 5 | |
#nb of vertices for resulting geom | |
geom_precision = 64 | |
#identify new origin | |
sr_wgs84 = arcpy.SpatialReference(4326) | |
pt = arcpy.Point(-40.0, 30.0)#middle of atlantic | |
pt_geom = arcpy.PointGeometry(pt, sr_wgs84) | |
sr = arcpy.SpatialReference(2154) | |
pt = pt_geom.projectAs(sr) | |
newOrigin = pt.firstPoint | |
curr_item = 0 | |
curr_x, curr_y = 0, 0 | |
with arcpy.da.UpdateCursor(fc, ['SHAPE@']) as cursor: | |
print(f"Anonymizing: '{fc}'") | |
for row in cursor: | |
print(f'handling item: {curr_item+1} / {count}') | |
curr_x = curr_item % width | |
curr_y = curr_item // width | |
curr_center_x = newOrigin.X + (curr_x * spacing) | |
curr_center_y = newOrigin.Y + (curr_y * spacing) | |
curr_max_radius = (spacing - (2*padding)) / 2 | |
curr_radius = random.uniform(spacing / 4, curr_max_radius) | |
curr_geom = create_circle_from_point((curr_center_x, curr_center_y), curr_radius, geom_precision, arcpy.SpatialReference(2154)) | |
row[0] = curr_geom | |
cursor.updateRow(row) | |
curr_item += 1 | |
def anonymize_fc_attribute(fc, field, string_mapper=None, range_min = None, range_max = None): | |
""" | |
anonymize one field in one fc | |
you can provide a mapper that will map explicit string value, else fallback to md5 hash (light anonymization) | |
for numeric a range may be provided for randomization, else fallback to rand between 0 and 200 | |
""" | |
#number of features | |
count = int(arcpy.GetCount_management(fc).getOutput(0)) | |
fields = arcpy.ListFields(fc, field) | |
field_in_fc = None | |
if(len(fields) == 0): | |
print(f"error '{field}' not fould in '{fc}'") | |
return | |
field_in_fc = fields[0] | |
if(field_in_fc.type not in ['String', 'Double', 'Integer']): | |
print(f"Can't set attribute wrong type: '${field_in_fc.type}' expected: 'String', 'Double', 'Integer'") | |
with arcpy.da.UpdateCursor(fc, [field]) as cursor: | |
print(f"Anonymizing: '{field}' of '{fc}'") | |
curr_item = 0 | |
for row in cursor: | |
print(f'handling item: {curr_item+1} / {count}') | |
if(field_in_fc.type == 'String'): | |
if(string_mapper is not None): | |
if(row[0] in string_mapper): | |
row[0] = string_mapper[row[0]] | |
else: | |
print(f"'{row[0]}' missing in mapper for field '{field}' in '{fc}'") | |
elif(isinstance(row[0], str)): | |
row[0] = hashlib.md5(row[0].encode('utf-8')).hexdigest() | |
#null remains null | |
elif(field_in_fc.type == 'Double'): | |
if(range_min is not None and range_max is not None): | |
row[0] = random.randrange(range_min, range_max) | |
else: | |
row[0] = random.randrange(0, 200) | |
elif(field_in_fc.type == 'Integer'): | |
if(range_min is not None and range_max is not None): | |
row[0] = random.randint(range_min, range_max) | |
else: | |
row[0] = random.randint(0, 200) | |
cursor.updateRow(row) | |
curr_item += 1 | |
if __name__ == '__main__': | |
fc = 'C:\\path\\to\\my.gdb\\my_fc' | |
anonymize_fc_geom(fc) | |
#string attribute anonymized by preserving categories | |
anonymize_fc_attribute(fc, "type", {'type 1': 'A', 'type 2': 'B' }) | |
#string attribute that will md5, (preserve unicity while hidding sensible text, only visual) | |
anonymize_fc_attribute(fc, "a-string-field") | |
#fixed range numeric | |
anonymize_fc_attribute(fc, "a-numeric-field", range_min=0, range_max=2048) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment