Created
May 16, 2024 12:08
-
-
Save Delors/189629b86265463e4a625924a9f705c8 to your computer and use it in GitHub Desktop.
Evaluate XPath Expressions with NodeJs and XPath
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
# Intro | |
Evaluates a set of XPath expressions against an XML file. | |
Usage: node xpaths_evaluator.js <spec.json> | |
The specification document is a JSON file with the specified JSON schema (see `specSchema` below). | |
# Required Libraries: | |
- xpath | |
- @xmldom/xmldom | |
- jsonschema | |
# Example | |
Given the following XML document (demo.xml): | |
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> | |
<lehrveranstaltungen | |
status="akkreditiert" | |
xmlns="http://dhbw-mannheim.de" | |
xmlns:xlink="http://www.w3.org/1999/xlink"> | |
<!-- Modul muss überarbeitet werden... --> | |
<modul> | |
<vorlesung id="191" | |
xlink:type="simple" | |
xlink:href="https://www.dhbw-mannheim.de/web_entwicklung"> | |
Web Entwicklung | |
</vorlesung> | |
<vorlesung id="2023">Verteilte Systeme</vorlesung> | |
</modul> | |
</lehrveranstaltungen> | |
and the following JSON document (demo.xpaths.json) with the xpaths: | |
{ | |
"source": "demo.xml", | |
"namespaces": { | |
"xlink": "http://www.w3.org/1999/xlink", | |
"dhbw": "http://dhbw-mannheim.de" | |
}, | |
"xpaths" : [ | |
{ | |
"expr": "//comment()" | |
},{ | |
"expr": "//dhbw:vorlesung", | |
"subExpr": ".." | |
},{ | |
"expr": "//@*[local-name(.)='href' and namespace-uri(.)='http://www.w3.org/1999/xlink']" | |
} | |
] | |
} | |
It is then possible to evaluate all xpaths against the document using this script: | |
node xpaths_evaluator.js demo.xpaths.json | |
@author Michael Eichberg | |
*/ | |
const specSchema = { | |
"title": "XPath Evaluator Specification", | |
"description": "A specification for evaluating XPath expressions against an XML file", | |
"type": "object", | |
"properties": { | |
"source": { | |
"description": "The path to the XML file.", | |
"type": "string", | |
}, | |
"namespaces": { | |
"description": "Namespaces used in the XPath expressions; keys are the prefixes, values are the URIs; i.e., the names.", | |
"type": "object", | |
}, | |
"xpaths": { | |
"type": "array", | |
"items": { | |
"type": "object", | |
"properties": { | |
"expr": { "type": "string" }, | |
"subExpr": { "type": "string" } | |
}, | |
"additionalProperties": false, | |
"required": ["expr"] | |
} | |
} | |
}, | |
"required": ["source"] | |
} | |
const validate = require('jsonschema').validate; | |
const fs = require("fs"); | |
const xpath = require('xpath'); | |
const dom = require('@xmldom/xmldom').DOMParser; | |
if (process.argv.length < 3) { | |
console.error("Usage: node xpaths_evaluator.js <spec.json>"); | |
process.exit(1); | |
} | |
var spec = JSON.parse(fs.readFileSync(process.argv[2], 'utf8')); | |
const errors = validate(spec, specSchema).errors; | |
if (errors && errors.length > 0) { | |
console.error(`Your xpaths document is not valid: ${JSON.stringify(errors)}`); | |
process.exit(1); | |
} | |
const xmlFile = fs.readFileSync(spec.source, "utf8"); | |
const xmlDOM = new dom().parseFromString(xmlFile); | |
const namespaces = spec.namespaces ? spec.namespaces : {}; | |
//console.log(`Namespaces: ${JSON.stringify(namespaces)}`); | |
spec.xpaths.forEach(xpathSpec => { | |
const baseExpr = xpathSpec["expr"]; | |
const subExpr = xpathSpec["subExpr"]; | |
console.log(`Evaluating "${baseExpr}":`); | |
const xpathNS = xpath.useNamespaces(namespaces); | |
try { | |
var result = xpathNS(baseExpr, xmlDOM); | |
} catch (error) { | |
console.error(`${error}`); | |
process.exit(1); | |
} | |
//console.log(`Result: ${typeof result}\n`); | |
if (typeof result !== "object") { | |
console.log(result); | |
} else { | |
result. | |
forEach((element, i) => { | |
if (subExpr) { | |
console.log(`${i+1}: Evaluating "${subExpr}" for node "${element.toString()}":\n`); | |
let result = xpath.evaluate( | |
subExpr, | |
element, | |
(prefix) => namespaces[prefix], | |
xpath.XPathResult.ANY_TYPE, | |
null); | |
let node = result.iterateNext() | |
while (node) { | |
console.log(node.toString()); | |
node = result.iterateNext(); | |
} | |
} else { | |
console.log(`${i+1}: ${element.toString()}\n`); | |
} | |
}); | |
} | |
console.log(`\n`); | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment