Skip to content

Instantly share code, notes, and snippets.

@corbinbs
Created July 18, 2011 13:05
Show Gist options
  • Save corbinbs/1089445 to your computer and use it in GitHub Desktop.
Save corbinbs/1089445 to your computer and use it in GitHub Desktop.
Simple XML file scrubber in Python
from xml.sax.saxutils import XMLGenerator
import xml
def mask_field(field):
return 'X'*len(field)
class ScrubHandler(XMLGenerator, object):
def __init__(self, scrub_map={}, out=None):
super(ScrubHandler, self).__init__(out=out)
self.scrub_map = scrub_map
self.buffer = ""
def startElement(self, name, attr):
super(ScrubHandler, self).startElement(name, attr)
def characters(self, data):
self.buffer += data
def endElement(self, name):
if name in self.scrub_map:
self.buffer = self.scrub_map[name](self.buffer)
super(ScrubHandler, self).characters(self.buffer)
self.buffer = ""
super(ScrubHandler, self).endElement(name)
def scrub_xml_file(input_filename, output_filename, scrub_map={}):
output_file = open(output_filename, 'w')
handler = ScrubHandler(out=output_file, scrub_map=scrub_map)
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(input_filename)
output_file.close()
if __name__ == "__main__":
scrub_elements = {'SSN': mask_field, 'LastName': mask_field }
sample_xml = "<Doc><SSN>123456789</SSN><LastName>CORBIN</LastName><FirstName>Brian</FirstName></Doc>"
handler = ScrubHandler(scrub_map=scrub_elements)
xml.sax.parseString(sample_xml, handler)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment