Created
November 29, 2023 23:03
-
-
Save dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5 to your computer and use it in GitHub Desktop.
find bare text node siblings in DOM: XPath approach
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 5 September 2023 | |
// Find uncontrolled or bare text node siblings in a DOM, | |
// XPath approach. | |
// SEE sketch 27 August 2023, Node iteration approach. | |
// https://gist.github.com/dfkaye/6cc8a9cda513dcf97f7a45ba89f4000b | |
// 6 September 2023 | |
// I have to say, | |
// 1. Over all, this solution feels better than the node iteration version, | |
// 2. XPath selectors take time to get right, | |
// 3. the visit() function is far shorter than the node iteration version, | |
// 4. the test() function uses its own console styles as well, but... meh, | |
// 5. the report() function is a bit longer than the node iteration version due | |
// to the branching logic, | |
// 6. it's easy to go overboard with Object.assign and friends ;) - I spent way | |
// too much time shaving message construction and test output logic yaks. | |
function test({ name, text }) { | |
// parse | |
var dom = (new DOMParser()).parseFromString(text.trim(), "text/html"); | |
document.body.replaceChildren(...dom.body.childNodes); | |
var contentNode = document.body; | |
var set = visit(contentNode); | |
var messages = report(set); | |
!messages.length | |
? console.log( | |
`\u{1F600} %cNo bare text nodes in ${name} test.`, | |
"background: lightgreen; padding: 2px;" | |
) | |
: ( | |
console.log( | |
`\u{1F626} %c${messages.length} bare text nodes in ${name} test.`, | |
"background: pink; padding: 2px;" | |
), | |
messages.forEach((message, i) => { | |
// Each message is an object with start, style, and (optionally) rest | |
// fields. We reduce them into an array, prepending the start text | |
// with the index of the message within the list of messages returned | |
// for the contentNode under test. | |
var output = Object.keys(message).reduce((A, k) => { | |
if (k == 'start') { | |
message[k] = [(i + 1) + ": "] + message[k]; | |
} | |
A.push(message[k]); | |
return A; | |
}, []); | |
console.log.apply(console, output); | |
}) | |
); | |
} | |
function visit(node) { | |
// visit | |
var xpath = `//*[count(./node()) > 0][count(./text()) != count(./node())]`; | |
var contentNode = node; | |
var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode); | |
var resultType = XPathResult.ANY_TYPE; | |
var result = null; | |
var args = [xpath, contentNode, namespaceResolver, resultType, result] | |
var set = document.evaluate(...args); | |
return set; | |
} | |
function report(set) { | |
// report | |
var messages = []; | |
var element; | |
while (element = set.iterateNext()) { | |
// We increment the variable, at, every time we find an element's content | |
// contains more than one occurrance of a text node's value, and use it in | |
// each iteration to prepend ever longer start text to the styled statement | |
// containing the text value, and ever shorter rest text. | |
var at = 0; | |
element.cloneNode(true).childNodes.forEach((n, i) => { | |
if (n.nodeType != 3 || !/\S/.test(n.nodeValue)) { | |
return; | |
} | |
var title = `<${element.nodeName}> contains a bare text sibling`; | |
var p = element.innerHTML.split(n.nodeValue); | |
var message, a, b, style; | |
if (p.length == 2) { | |
// If there's only one occurrance, then p has two parts, and joining is | |
// trivial. | |
// I find using a and b easier scan than p[0] and p[1]. | |
[a, b] = p; | |
style = "background: lightskyblue;"; | |
title += `, "${n.nodeValue}"\n`; | |
} | |
else { | |
// If there's more than one occurrance, then p has more than two parts, | |
// so we increment the tracking variable, at, and slice p into smaller | |
// arrays (start and rest) and rejoin those. | |
at += 1; | |
[a, b] = [ | |
p.slice(0, at).join(n.nodeValue), | |
p.slice(at).join(n.nodeValue) | |
]; | |
style = "background: aqua;"; | |
title += `, "${n.nodeValue}" (${at} of ${p.length - 1} occurrances).\n`; | |
} | |
var [TAG, CLOSING_TAG] = element.outerHTML.split(element.innerHTML); | |
message = Object.assign({ | |
start: title + TAG + a + "%c" + n.nodeValue, | |
style, | |
rest: b + CLOSING_TAG | |
}); | |
messages.push(message); | |
}); | |
} | |
return messages; | |
} | |
/* test it out */ | |
var bareText = ` | |
<head><title> * title * </title></head> | |
<body> | |
first | |
<main> | |
one <a>main link one </a> two. | |
<article> | |
three <a>article link </a> four. | |
</article> | |
<!-- comment --> | |
<aside> | |
five <a>aside link </a> six. | |
</aside> | |
seven <a>main link seven two </a> eight. | |
</main> | |
<footer> | |
AA <b>i</b> AA <b>i</b> AA <b>i</b> AA <b>i</b> ZZ <i>i</i> AA <b>i</b> AA | |
</footer> | |
last <b> b </b> last | |
</body> | |
`; | |
var noBareText = ` | |
<head><title> * title * </title></head> | |
<body> | |
<h1>first</h1> | |
<main> | |
<b>one</b> <a>main link one </a> <b>two.</b> | |
<article> | |
<b>three</b> <a>article link </a> <b>four.</b> | |
</article> | |
<!-- comment --> | |
<aside> | |
<b>five</b> <a>aside link </a> <b>six.</b> | |
</aside> | |
<b>seven</b> <a>main link seven two </a> <b>eight.</b> | |
</main> | |
<b> b </b> | |
</body> | |
`; | |
var tests = [ | |
{ name:"bareText", text: bareText }, | |
{ name:"noBareText", text: noBareText } | |
]; | |
tests.forEach(item => test(item)); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment