Last active
May 6, 2024 19:54
-
-
Save birkin/7fcde347b65deb2df755b63923803745 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
To add to server run_tests to ensure xmlcatalog is properly configured, and properly being called. | |
A MODS file likely be the best candidate for the `xml_filepath`. | |
""" | |
import os | |
from lxml import etree | |
def validate_xml_with_schema( xml_filepath: str, xsd_filepath: str ) -> None: | |
""" | |
Validates an XML file against an XSD schema, without network access. | |
Confirms that: | |
- xmlcatalog is routing the schema location to the local file system | |
- the C `libxml2` library used by lxml does auto-default to the standard server's `xml/catalog` file. | |
- so this should not be necessary: `os.environ['XML_CATALOG_FILES'] = '/path/to/xml/catalog'` | |
""" | |
schema_obj = etree.XMLSchema( etree.XML(open(xsd_filepath, 'rb').read()) ) | |
parser_obj = etree.XMLParser( schema=schema_obj, no_network=True ) | |
try: | |
etree.parse( xml_filepath, parser_obj ) # since the parser is buil with the schema, it will auto-validate the XML, no need for `schema.assertValid( doc )` | |
print( "XML is valid according to the schema." ) | |
except etree.DocumentInvalid as e: | |
print( f"XML validation error: {e}" ) | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment