Created
November 22, 2013 15:17
-
-
Save dgroft/7601523 to your computer and use it in GitHub Desktop.
Applies XSL transform to XML returned from an HTTP request, then validates it against an XSD/schema.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import argparse | |
import lxml.etree as ET | |
import urllib | |
parser = argparse.ArgumentParser(description="Transforms and Validates XML") | |
parser.add_argument("--feeds", help="the text file that holds the urls to be validated", default="feeds.txt") | |
parser.add_argument("--log", help="the output log file that reports all errors", default="log.txt") | |
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") | |
args = parser.parse_args() | |
def getDOM(feedUrl): | |
# hit the url (fetch the xml), then parse it into a dom object | |
return ET.parse(urllib.urlopen(feedUrl)) | |
def getXslt(feedUrl): | |
# examine the feedUrl, determine which XSLT to use | |
if "myspecialfeeds/rest/abc" in feedUrl: return "xslts/Abc.xslt" | |
if "myspecialfeeds/rest/def" in feedUrl: return "xslts/Def.xslt" | |
if "myspecialfeeds/rest/ghi" in feedUrl: return "xslts/Ghi.xslt" | |
return | |
def applyXslt(dom, xsltName): | |
# parse the xsl transform into an object, then apply it to the incoming dom | |
transform = ET.XSLT(ET.parse(xsltName)) | |
return transform(dom) | |
def getXsd(feedUrl): | |
# examine the feedUrl, determine which XSD to use | |
if "myspecialfeeds/rest/abc" in feedUrl: return "xsds/Abc.xsd" | |
if "myspecialfeeds/rest/def" in feedUrl: return "xsds/Def.xsd" | |
if "myspecialfeeds/rest/ghi" in feedUrl: return "xsds/Ghi.xsd" | |
return | |
def applyXsd(transformedXml, xsdName): | |
# parse the xsd into an object, then validate the incoming dom against it | |
xmlschema = ET.XMLSchema(ET.parse(xsdName)) | |
xmlschema.assertValid(transformedXml) | |
return | |
with open(args.log, "a") as logFile: | |
with open(args.feeds, "r") as feedsFile: | |
print() | |
for feedLine in feedsFile: | |
# strip out the line terminator | |
feedUrl = feedLine.rstrip("\n") | |
# if url is commented out using '#', skip line | |
if feedUrl.startswith("#"): continue | |
# if the line is empty, skip line | |
if not feedUrl: continue | |
print("Validating: " + feedUrl) | |
try: | |
# fetch xml from feed via url | |
dom = getDOM(feedUrl) | |
# only print the xml if verbose logging is enabled | |
if args.verbose: print(ET.tostring(dom, pretty_print=True)) | |
# determine which xsl transform to apply | |
xslt = getXslt(feedUrl) | |
# apply the xsl transform | |
transformed_xml = applyXslt(dom, xslt) | |
# only print the transformed xml if verbose logging is enabled | |
if args.verbose: print(ET.tostring(transformed_xml, pretty_print=True)) | |
# determine which xsd to validate transformed xml against | |
xsd = getXsd(feedUrl) | |
# validate the xml against the xsd | |
applyXsd(transformed_xml, xsd) | |
print("Feed is valid.") | |
except Exception, e: | |
print("Feed is INVALID! (See log for details.)") | |
print("Invalid feed URL: " + feedUrl, file=logFile) | |
print(e, file=logFile) | |
if args.verbose: | |
print("Feed URL returned the following XML:", file=logFile) | |
print(ET.tostring(dom, pretty_print=True), file=logFile) | |
print("Feed URL XML transformed to:", file=logFile) | |
print(ET.tostring(transformed_xml, pretty_print=True), file=logFile) | |
print("", file=logFile) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment