Created
April 18, 2010 17:42
-
-
Save generalov/370425 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
__all__ = ('resolve_entities', ) | |
try: | |
from cStringIO import StringIO | |
except ImportError: | |
from StringIO import StringIO | |
from xml.sax import handler | |
from xml.sax import make_parser | |
from xml.sax import saxutils | |
def xml_resolve_entities(xmlstr, resolve_dtd=None, output_encoding='utf-8'): | |
"""Convert all XML entities to their applicable values. | |
:param xmlstr: The input file-like object or string. | |
:param resolve_dtd: The function to resolve the system identifier of an | |
entity and return either the system identifier to read from as a | |
string, or an file-like object to read from. The default implementation | |
returns systemId. | |
:rtype string: XML result | |
For example:: | |
>>> xml_source = '''<?xml version="1.0"?> | |
... <!DOCTYPE times SYSTEM "sample.dtd"> | |
... <math>×</math>''' | |
>>> dtd_catalogue = {'sample.dtd': '<!ENTITY times "×">', } | |
>>> print xml_resolve_entities(xml_source, dtd_catalogue.get) | |
<?xml version="1.0" encoding="utf-8"?> | |
<math>×</math> | |
""" | |
flxml = enshure_is_filelike(xmlstr) | |
out = StringIO() | |
try: | |
parser = make_parser() | |
if resolve_dtd: | |
parser.setEntityResolver(EntityResolver(resolve_dtd)) | |
parser.setContentHandler(saxutils.XMLGenerator(out, output_encoding)) | |
parser.parse(flxml) | |
output_xmlstr = out.getvalue() | |
finally: | |
out.close() | |
return output_xmlstr | |
def enshure_is_filelike(filelike): | |
if isinstance(filelike, basestring): | |
return StringIO(filelike) | |
else: | |
# FIXME: как удостовериться, что filelike действительно похож на файл? | |
# EAFP: верьте на слово. если это не так, то парсер крякнет. | |
return filelike | |
class EntityResolver(handler.EntityResolver): | |
def __init__(self, resolve): | |
self._resolve = resolve | |
def resolveEntity(self, unused_public_id, system_id): | |
data = self._resolve(system_id) | |
return enshure_is_filelike(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment