Skip to content

Instantly share code, notes, and snippets.

@dfkaye
Created March 4, 2024 08:52
Show Gist options
  • Save dfkaye/2e5a0a799441eb6c8a2f8d866d86c1c9 to your computer and use it in GitHub Desktop.
Save dfkaye/2e5a0a799441eb6c8a2f8d866d86c1c9 to your computer and use it in GitHub Desktop.
generate an XPath expression for an element in an HTML document
// 3 March 2024
// generate an XPath expression for an element in an HTML document
// ONLY WORKS FOR ELEMENTS SO FAR, NO FANCY NODE TYPE SELECTION YET
// for an element tree structure such as
// <a><b><c><d id="test"> this is <e> embedded </e> text </d></c></b></a>
// the XPath expression for window.test (id="test") should be
// //BODY[1]/A[1]/B[1]/C[1]/D[1]
// and passing that to document.evaluate() should return
// <d id=\"test\"> this is <e> embedded </e> text </d>
function xpath(node) {
if (node === document.documentElement) {
// Prepend an extra "/" to make our expression start from the first element child.
return "/";
}
var i = 1;
var p = node.previousElementSibling;
while (p) {
if (p.nodeName == node.nodeName) {
i++;
}
p = p.previousElementSibling;
}
return xpath(node.parentElement) + `/${node.nodeName}[${i}]`;
}
// Find element with this function, revised from gist, 5 September 2023,
// "Find uncontrolled or bare text node siblings in a DOM, XPath approach",
// https://gist.github.com/dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5
function visit(path) {
var xpath = path;
var contentNode = document;
var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode);
var resultType = XPathResult.ANY_UNORDERED_NODE_TYPE;
var result = null;
var args = [xpath, contentNode, namespaceResolver, resultType, result]
var set = document.evaluate(...args);
return set;
}
/* test it out */
var parser = new DOMParser;
var tests = [
`<div><p id="test">some text</p></div>`,
`<label for="test">a</label><input id="test" value="** test **">`,
`<a><b><c><d id="test"> this is <e> embedded </e> text </d></c></b></a>`
];
var results = tests.map(html => {
var dom = parser.parseFromString(`<f>` + html + `</f>`, "text/html")
.body
.firstElementChild;
// need to add get textNode test
//var test = dom.querySelector("#test");
document.body.replaceChildren.apply(document.body, dom.childNodes);
var path = xpath(window.test);
console.log({ path }) ;
var set = visit(path);
console.warn(set);
return set.singleNodeValue.outerHTML;
// var collection = []
// var element;
// while (element = set.iterateNext()) {
// collection.push(element.outerHTML);
// }
// console.assert(
// collection.length === 1,
// "should be only one element, found " + collection.length
// );
// return collection.join("");
});
console.log(JSON.stringify(results, null, 2));
/*
[
"<p id=\"test\">some text</p>",
"<input id=\"test\" value=\"** test **\">",
"<d id=\"test\"> this is <e> embedded </e> text </d>"
]
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment