Created
March 5, 2014 02:31
-
-
Save neurosnap/9360098 to your computer and use it in GitHub Desktop.
Anonymize a DICOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def anonymize(filename, output_filename, new_person_name="anonymous", | |
new_patient_id="id", remove_curves=True, remove_private_tags=True): | |
def sanitize_html(dataset): | |
"""Removing all potential XSS attacks embedded within the dicom file""" | |
dataset.walk(sanitize_callback) | |
return dataset | |
def sanitize_callback(dataset, data_element): | |
"""Call from dataset "walk" recursive function for all data elements. """ | |
if data_element.VR != "OW" and data_element.VR != "SQ": | |
try: | |
if isinstance(data_element.value, basestring): | |
data_element.value = bleach.clean(data_element.value) | |
except: | |
pass | |
# Define call-back functions for the dataset.walk() function | |
def PN_callback(ds, data_element): | |
"""Called from the dataset "walk" recursive function for all data elements.""" | |
if data_element.VR == "PN": | |
data_element.value = new_person_name | |
def curves_callback(ds, data_element): | |
"""Called from the dataset "walk" recursive function for all data elements.""" | |
if data_element.tag.group & 0xFF00 == 0x5000: | |
del ds[data_element.tag] | |
# Load the current dicom file to 'anonymize' | |
dataset = dicom.read_file(filename) | |
# remove all html tags to prevent XSS | |
dataset = sanitize_html(dataset) | |
# Remove patient name and any other person names | |
dataset.walk(PN_callback) | |
# Change ID | |
dataset.PatientID = new_patient_id | |
# Remove data elements (should only do so if DICOM type 3 optional) | |
# Use general loop so easy to add more later | |
# Could also have done: del ds.OtherPatientIDs, etc. | |
for name in ['OtherPatientIDs', 'OtherPatientIDsSequence']: | |
if name in dataset: | |
delattr(dataset, name) | |
# Same as above but for blanking data elements that are type 2. | |
for name in ['PatientBirthDate']: | |
if name in dataset: | |
dataset.data_element(name).value = '' | |
# Remove private tags if function argument says to do so. Same for curves | |
if remove_private_tags: | |
dataset.remove_private_tags() | |
if remove_curves: | |
dataset.walk(curves_callback) | |
# write the 'anonymized' DICOM out under the new filename | |
dataset.save_as(output_filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment