Created
March 4, 2024 08:52
-
-
Save dfkaye/2e5a0a799441eb6c8a2f8d866d86c1c9 to your computer and use it in GitHub Desktop.
generate an XPath expression for an element in an HTML document
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 3 March 2024 | |
// generate an XPath expression for an element in an HTML document | |
// ONLY WORKS FOR ELEMENTS SO FAR, NO FANCY NODE TYPE SELECTION YET | |
// for an element tree structure such as | |
// <a><b><c><d id="test"> this is <e> embedded </e> text </d></c></b></a> | |
// the XPath expression for window.test (id="test") should be | |
// //BODY[1]/A[1]/B[1]/C[1]/D[1] | |
// and passing that to document.evaluate() should return | |
// <d id=\"test\"> this is <e> embedded </e> text </d> | |
function xpath(node) { | |
if (node === document.documentElement) { | |
// Prepend an extra "/" to make our expression start from the first element child. | |
return "/"; | |
} | |
var i = 1; | |
var p = node.previousElementSibling; | |
while (p) { | |
if (p.nodeName == node.nodeName) { | |
i++; | |
} | |
p = p.previousElementSibling; | |
} | |
return xpath(node.parentElement) + `/${node.nodeName}[${i}]`; | |
} | |
// Find element with this function, revised from gist, 5 September 2023, | |
// "Find uncontrolled or bare text node siblings in a DOM, XPath approach", | |
// https://gist.github.com/dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5 | |
function visit(path) { | |
var xpath = path; | |
var contentNode = document; | |
var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode); | |
var resultType = XPathResult.ANY_UNORDERED_NODE_TYPE; | |
var result = null; | |
var args = [xpath, contentNode, namespaceResolver, resultType, result] | |
var set = document.evaluate(...args); | |
return set; | |
} | |
/* test it out */ | |
var parser = new DOMParser; | |
var tests = [ | |
`<div><p id="test">some text</p></div>`, | |
`<label for="test">a</label><input id="test" value="** test **">`, | |
`<a><b><c><d id="test"> this is <e> embedded </e> text </d></c></b></a>` | |
]; | |
var results = tests.map(html => { | |
var dom = parser.parseFromString(`<f>` + html + `</f>`, "text/html") | |
.body | |
.firstElementChild; | |
// need to add get textNode test | |
//var test = dom.querySelector("#test"); | |
document.body.replaceChildren.apply(document.body, dom.childNodes); | |
var path = xpath(window.test); | |
console.log({ path }) ; | |
var set = visit(path); | |
console.warn(set); | |
return set.singleNodeValue.outerHTML; | |
// var collection = [] | |
// var element; | |
// while (element = set.iterateNext()) { | |
// collection.push(element.outerHTML); | |
// } | |
// console.assert( | |
// collection.length === 1, | |
// "should be only one element, found " + collection.length | |
// ); | |
// return collection.join(""); | |
}); | |
console.log(JSON.stringify(results, null, 2)); | |
/* | |
[ | |
"<p id=\"test\">some text</p>", | |
"<input id=\"test\" value=\"** test **\">", | |
"<d id=\"test\"> this is <e> embedded </e> text </d>" | |
] | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment