Created
October 26, 2013 12:23
-
-
Save tomkralidis/7168870 to your computer and use it in GitHub Desktop.
using Python lxml to do XML Schema validation on a document, against multiple XML Schema (courtesy @rouault)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#------------------------------------------------------------------------------- | |
# Portions coming from EOxServer | |
# ( https://github.com/EOxServer/eoxserver/blob/master/eoxserver/services/testbase.py ) | |
# Copyright (C) 2011 EOX IT Services GmbH | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies of this Software or works derived from this Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
# THE SOFTWARE. | |
#------------------------------------------------------------------------------- | |
from lxml import etree | |
import os | |
def ingest_file_and_strip_mime(filename): | |
data = '' | |
f = open(filename, 'rb') | |
for line in f.readlines(): | |
if line == '\r\n': | |
continue | |
if line == '\n': | |
continue | |
if line.find('Content-Type') >= 0: | |
continue | |
data = data + line | |
f.close() | |
return data | |
def substitute_opengis_schema_location(location, alternate_opengisschemas_location): | |
if alternate_opengisschemas_location is not None and \ | |
location.startswith('http://schemas.opengis.net/'): | |
location = alternate_opengisschemas_location + '/' + location[len('http://schemas.opengis.net/'):] | |
return location | |
def validate(xml_filename_or_content, xsd_filename = None, \ | |
application_schema_ns = 'http://mapserver.gis.umn.edu/mapserver', \ | |
alternate_opengisschemas_location = 'SCHEMAS_OPENGIS_NET'): | |
if xml_filename_or_content.find('<?xml') == 0: | |
doc = etree.XML(xml_filename_or_content) | |
else: | |
doc = etree.XML(ingest_file_and_strip_mime(xml_filename_or_content)) | |
# Special case if this is a schema | |
if doc.tag == '{http://www.w3.org/2001/XMLSchema}schema': | |
for child in doc: | |
if child.tag == '{http://www.w3.org/2001/XMLSchema}import': | |
location = child.get('schemaLocation') | |
location = substitute_opengis_schema_location(location, alternate_opengisschemas_location) | |
child.set('schemaLocation', location) | |
etree.XMLSchema(etree.XML(etree.tostring(doc))) | |
return True | |
schema_locations = doc.get("{http://www.w3.org/2001/XMLSchema-instance}schemaLocation") | |
# Our stripped GetFeature document have an empty timeStamp, put a | |
# fake value one instead | |
if doc.get('timeStamp') == '': | |
doc.set('timeStamp', '1970-01-01T00:00:00Z') | |
locations = schema_locations.split() | |
# get schema locations | |
schema_def = etree.Element("schema", attrib={ | |
"elementFormDefault": "qualified", | |
"version": "1.0.0", | |
}, nsmap={ | |
None: "http://www.w3.org/2001/XMLSchema" | |
} | |
) | |
tempfiles = [] | |
# Special case for the main application schema | |
for ns, location in zip(locations[::2], locations[1::2]): | |
if ns == application_schema_ns: | |
if xsd_filename is not None: | |
location = xsd_filename | |
else: | |
location = xml_filename[0:-3]+'xsd' | |
# Remove mime-type header line if found to generate a valid .xsd | |
sanitized_content = ingest_file_and_strip_mime(location) | |
location = '/tmp/tmpschema%d.xsd' % len(tempfiles) | |
f = open(location, 'wb') | |
f.write(sanitized_content) | |
tempfiles.append(location) | |
f.close() | |
xsd = etree.XML(sanitized_content) | |
for child in xsd: | |
if child.tag == '{http://www.w3.org/2001/XMLSchema}import': | |
sub_ns = child.get('namespace') | |
sub_location = child.get('schemaLocation') | |
sub_location = substitute_opengis_schema_location(sub_location, alternate_opengisschemas_location) | |
etree.SubElement(schema_def, "import", attrib={ | |
"namespace": sub_ns, | |
"schemaLocation": sub_location | |
} | |
) | |
etree.SubElement(schema_def, "import", attrib={ | |
"namespace": ns, | |
"schemaLocation": location | |
} | |
) | |
# Add each schemaLocation as an import | |
for ns, location in zip(locations[::2], locations[1::2]): | |
if ns == application_schema_ns: | |
continue | |
location = substitute_opengis_schema_location(location, alternate_opengisschemas_location) | |
etree.SubElement(schema_def, "import", attrib={ | |
"namespace": ns, | |
"schemaLocation": location | |
} | |
) | |
# TODO: ugly workaround. But otherwise, the doc is not recognized as schema | |
schema = etree.XMLSchema(etree.XML(etree.tostring(schema_def))) | |
try: | |
schema.assertValid(doc) | |
ret = True | |
except etree.Error as e: | |
print(str(e)) | |
ret = False | |
for filename in tempfiles: | |
os.remove(filename) | |
return ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment