Skip to content

Instantly share code, notes, and snippets.

@generalov
Created April 18, 2010 17:42
Show Gist options
  • Save generalov/370425 to your computer and use it in GitHub Desktop.
Save generalov/370425 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
__all__ = ('resolve_entities', )
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from xml.sax import handler
from xml.sax import make_parser
from xml.sax import saxutils
def xml_resolve_entities(xmlstr, resolve_dtd=None, output_encoding='utf-8'):
"""Convert all XML entities to their applicable values.
:param xmlstr: The input file-like object or string.
:param resolve_dtd: The function to resolve the system identifier of an
entity and return either the system identifier to read from as a
string, or an file-like object to read from. The default implementation
returns systemId.
:rtype string: XML result
For example::
>>> xml_source = '''<?xml version="1.0"?>
... <!DOCTYPE times SYSTEM "sample.dtd">
... <math>&times;</math>'''
>>> dtd_catalogue = {'sample.dtd': '<!ENTITY times "&#215;">', }
>>> print xml_resolve_entities(xml_source, dtd_catalogue.get)
<?xml version="1.0" encoding="utf-8"?>
<math>×</math>
"""
flxml = enshure_is_filelike(xmlstr)
out = StringIO()
try:
parser = make_parser()
if resolve_dtd:
parser.setEntityResolver(EntityResolver(resolve_dtd))
parser.setContentHandler(saxutils.XMLGenerator(out, output_encoding))
parser.parse(flxml)
output_xmlstr = out.getvalue()
finally:
out.close()
return output_xmlstr
def enshure_is_filelike(filelike):
if isinstance(filelike, basestring):
return StringIO(filelike)
else:
# FIXME: как удостовериться, что filelike действительно похож на файл?
# EAFP: верьте на слово. если это не так, то парсер крякнет.
return filelike
class EntityResolver(handler.EntityResolver):
def __init__(self, resolve):
self._resolve = resolve
def resolveEntity(self, unused_public_id, system_id):
data = self._resolve(system_id)
return enshure_is_filelike(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment