Skip to content

Instantly share code, notes, and snippets.

@dfkaye
Created November 29, 2023 23:03
Show Gist options
  • Save dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5 to your computer and use it in GitHub Desktop.
Save dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5 to your computer and use it in GitHub Desktop.
find bare text node siblings in DOM: XPath approach
// 5 September 2023
// Find uncontrolled or bare text node siblings in a DOM,
// XPath approach.
// SEE sketch 27 August 2023, Node iteration approach.
// https://gist.github.com/dfkaye/6cc8a9cda513dcf97f7a45ba89f4000b
// 6 September 2023
// I have to say,
// 1. Over all, this solution feels better than the node iteration version,
// 2. XPath selectors take time to get right,
// 3. the visit() function is far shorter than the node iteration version,
// 4. the test() function uses its own console styles as well, but... meh,
// 5. the report() function is a bit longer than the node iteration version due
// to the branching logic,
// 6. it's easy to go overboard with Object.assign and friends ;) - I spent way
// too much time shaving message construction and test output logic yaks.
function test({ name, text }) {
// parse
var dom = (new DOMParser()).parseFromString(text.trim(), "text/html");
document.body.replaceChildren(...dom.body.childNodes);
var contentNode = document.body;
var set = visit(contentNode);
var messages = report(set);
!messages.length
? console.log(
`\u{1F600} %cNo bare text nodes in ${name} test.`,
"background: lightgreen; padding: 2px;"
)
: (
console.log(
`\u{1F626} %c${messages.length} bare text nodes in ${name} test.`,
"background: pink; padding: 2px;"
),
messages.forEach((message, i) => {
// Each message is an object with start, style, and (optionally) rest
// fields. We reduce them into an array, prepending the start text
// with the index of the message within the list of messages returned
// for the contentNode under test.
var output = Object.keys(message).reduce((A, k) => {
if (k == 'start') {
message[k] = [(i + 1) + ": "] + message[k];
}
A.push(message[k]);
return A;
}, []);
console.log.apply(console, output);
})
);
}
function visit(node) {
// visit
var xpath = `//*[count(./node()) > 0][count(./text()) != count(./node())]`;
var contentNode = node;
var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode);
var resultType = XPathResult.ANY_TYPE;
var result = null;
var args = [xpath, contentNode, namespaceResolver, resultType, result]
var set = document.evaluate(...args);
return set;
}
function report(set) {
// report
var messages = [];
var element;
while (element = set.iterateNext()) {
// We increment the variable, at, every time we find an element's content
// contains more than one occurrance of a text node's value, and use it in
// each iteration to prepend ever longer start text to the styled statement
// containing the text value, and ever shorter rest text.
var at = 0;
element.cloneNode(true).childNodes.forEach((n, i) => {
if (n.nodeType != 3 || !/\S/.test(n.nodeValue)) {
return;
}
var title = `<${element.nodeName}> contains a bare text sibling`;
var p = element.innerHTML.split(n.nodeValue);
var message, a, b, style;
if (p.length == 2) {
// If there's only one occurrance, then p has two parts, and joining is
// trivial.
// I find using a and b easier scan than p[0] and p[1].
[a, b] = p;
style = "background: lightskyblue;";
title += `, "${n.nodeValue}"\n`;
}
else {
// If there's more than one occurrance, then p has more than two parts,
// so we increment the tracking variable, at, and slice p into smaller
// arrays (start and rest) and rejoin those.
at += 1;
[a, b] = [
p.slice(0, at).join(n.nodeValue),
p.slice(at).join(n.nodeValue)
];
style = "background: aqua;";
title += `, "${n.nodeValue}" (${at} of ${p.length - 1} occurrances).\n`;
}
var [TAG, CLOSING_TAG] = element.outerHTML.split(element.innerHTML);
message = Object.assign({
start: title + TAG + a + "%c" + n.nodeValue,
style,
rest: b + CLOSING_TAG
});
messages.push(message);
});
}
return messages;
}
/* test it out */
var bareText = `
<head><title> * title * </title></head>
<body>
first
<main>
one <a>main link one </a> two.
<article>
three <a>article link </a> four.
</article>
<!-- comment -->
<aside>
five <a>aside link </a> six.
</aside>
seven <a>main link seven two </a> eight.
</main>
<footer>
AA <b>i</b> AA <b>i</b> AA <b>i</b> AA <b>i</b> ZZ <i>i</i> AA <b>i</b> AA
</footer>
last <b> b </b> last
</body>
`;
var noBareText = `
<head><title> * title * </title></head>
<body>
<h1>first</h1>
<main>
<b>one</b> <a>main link one </a> <b>two.</b>
<article>
<b>three</b> <a>article link </a> <b>four.</b>
</article>
<!-- comment -->
<aside>
<b>five</b> <a>aside link </a> <b>six.</b>
</aside>
<b>seven</b> <a>main link seven two </a> <b>eight.</b>
</main>
<b> b </b>
</body>
`;
var tests = [
{ name:"bareText", text: bareText },
{ name:"noBareText", text: noBareText }
];
tests.forEach(item => test(item));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment