Skip to content

Instantly share code, notes, and snippets.

@neurosnap
Created March 5, 2014 02:31
Show Gist options
  • Save neurosnap/9360098 to your computer and use it in GitHub Desktop.
Save neurosnap/9360098 to your computer and use it in GitHub Desktop.
Anonymize a DICOM
def anonymize(filename, output_filename, new_person_name="anonymous",
new_patient_id="id", remove_curves=True, remove_private_tags=True):
def sanitize_html(dataset):
"""Removing all potential XSS attacks embedded within the dicom file"""
dataset.walk(sanitize_callback)
return dataset
def sanitize_callback(dataset, data_element):
"""Call from dataset "walk" recursive function for all data elements. """
if data_element.VR != "OW" and data_element.VR != "SQ":
try:
if isinstance(data_element.value, basestring):
data_element.value = bleach.clean(data_element.value)
except:
pass
# Define call-back functions for the dataset.walk() function
def PN_callback(ds, data_element):
"""Called from the dataset "walk" recursive function for all data elements."""
if data_element.VR == "PN":
data_element.value = new_person_name
def curves_callback(ds, data_element):
"""Called from the dataset "walk" recursive function for all data elements."""
if data_element.tag.group & 0xFF00 == 0x5000:
del ds[data_element.tag]
# Load the current dicom file to 'anonymize'
dataset = dicom.read_file(filename)
# remove all html tags to prevent XSS
dataset = sanitize_html(dataset)
# Remove patient name and any other person names
dataset.walk(PN_callback)
# Change ID
dataset.PatientID = new_patient_id
# Remove data elements (should only do so if DICOM type 3 optional)
# Use general loop so easy to add more later
# Could also have done: del ds.OtherPatientIDs, etc.
for name in ['OtherPatientIDs', 'OtherPatientIDsSequence']:
if name in dataset:
delattr(dataset, name)
# Same as above but for blanking data elements that are type 2.
for name in ['PatientBirthDate']:
if name in dataset:
dataset.data_element(name).value = ''
# Remove private tags if function argument says to do so. Same for curves
if remove_private_tags:
dataset.remove_private_tags()
if remove_curves:
dataset.walk(curves_callback)
# write the 'anonymized' DICOM out under the new filename
dataset.save_as(output_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment