Last active
November 29, 2023 23:04
-
-
Save dfkaye/6cc8a9cda513dcf97f7a45ba89f4000b to your computer and use it in GitHub Desktop.
find bare text node siblings in DOM: Node iteration approach
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 27 August 2023 | |
// Find uncontrolled or bare text node siblings in a DOM, | |
// Node iteration approach. | |
// SEE sketch 5 September 2023, XPath approach. | |
// https://gist.github.com/dfkaye/6bf4fa391cec4e9a9c44cc05af504ae5 | |
// There's no such official term for bare text nodes. I refer to a text node as | |
// "uncontrolled" or "bare" when it shares a parent with a sibling element or | |
// comment. Uncontrolled text nodes are slightly harder to update dynamically, | |
// independently of their sibling element(s). | |
// Static text nodes siblings are, in fact, valid HTML. However, any text node | |
// with potentially dynamic content independent of any siblings should really be | |
// the node value of its own parent element. | |
// The test function below depends on the following three functions - parse, | |
// visit, and report - to turn an HTML or XML string into a DOM element, iterate | |
// elements and comments in childNodes recursively, and report any text nodes | |
// that are siblings to elements or comments. | |
// The visit function is the core algorithm for iterating and examining elements | |
// for nodeType and nodeValue, and calling the report function when a text node | |
// is a sibling to an element or comment. | |
// The report function creates a message as an array to including a style | |
// directive for colorful display in the console. | |
// Coolness factors: | |
// 1. The visit function is surprisingly short, notwithstanding the comments. | |
// 2. The report function needed some help after I added support for comments as | |
// siblings. Once I'd done that, I saw that building the HTML string for the | |
// output was a simpler solution than splitting and parsing an element's | |
// innerHTML. | |
// 3. The test function writes to the console in order to demonstrate how to use | |
// the style directive to providing some visualization, highlighting text | |
// nodes in the context of an element's HTML or XML string. | |
// 31 August 2023 | |
// Completely re-worked the reporting "algorithm" to handle comment nodes. | |
// All prior test cases still passed. | |
// Don't tell me TDD doesn't work. | |
// Software is less about IQ than it is elbow grease. | |
// 31 August 2023 | |
// Simplified the visitor "algorithm" somewhat with simpler sibling check. | |
// 1 September 2023 | |
// Added tests for select, textarea, and XML. | |
// Modified parse to support XML doctype option. | |
// Fixed sibling logic in visitor altogether to report sibling text if not | |
// assigned in the previous iteration, then report the current node if it's | |
// text, else visit the current node (i.e., its childNodes) if it's an element. | |
// 1 September 2023 | |
// Begin working on console assertion support to turn this into a better test | |
// suite with testable output and expected output events for each test. | |
// 2 September 2023 | |
// Completed console assertion support and tests and documentation and comments. | |
// Definitely overkill for a gist. | |
//////////////////////////////////////////////////////////////////////////////// | |
function test(xmlString, options) { | |
var dom = parse(xmlString, options); | |
////////////////////////////////////////////////////////////////////////////// | |
// 1 September 2023 | |
// BEGIN ASSERTION HELPER | |
////////////////////////////////////////////////////////////////////////////// | |
var results = visit(dom); | |
// Complain if expect not specified as a number. | |
var { expect } = options; | |
console.assert( | |
Number(expect) === expect, | |
`"expect" not specified as a number: "${expect}".` | |
); | |
// Assert asserts assertion... | |
console.assert( | |
expect === results.length, | |
`expected ${expect} but saw ${results.length}` | |
); | |
// Print results | |
return results.forEach(result => console.log.apply(console, result)); | |
} | |
// Process xmlString into a DOM element. | |
function parse(xmlString, options = {}) { | |
var { append = '', prepend = '', type = "text/html" } = options; | |
var xml = String(xmlString).trim(); | |
var doc = (new DOMParser).parseFromString(xml, type); | |
var dom = type != "text/html" | |
? doc.firstElementChild | |
: doc.body; | |
// surround prepend or append text with spaces so fragments stand out. | |
if (prepend) { | |
dom.prepend(new Text(` ${options.prepend} prepended `)); | |
} | |
if (append) { | |
dom.append(new Text(` ${options.append} appended `)); | |
} | |
return dom; | |
} | |
/* | |
* The visit function is the core algorithm, iterating an element's childNodes, | |
* looking for text nodes that are siblings to comment nodes or element nodes, | |
* reporting such text nodes to the report function (further down). | |
*/ | |
function visit(element) { | |
// 31 August 2023 | |
// Commented this out in order to test the detection of any programmatically | |
// prepended and appended text nodes. | |
// element.normalize(); | |
var results = []; | |
var nodes = Array.from(element.childNodes); | |
var textNode = new Text; | |
nodes.forEach(function (node, index) { | |
// 31 August 2023 | |
// Simplified sibling object test. | |
// TypeScript programmers don't know about this, but you can pass any value | |
// to the Object constructor and receive an object back. In this case, if we | |
// don't find a sibling, we still have an object, and can test for nodeType | |
// and nodeValue properties. | |
var sibling = Object(node.previousSibling); | |
// RegExp test that target has at least one non-space character. | |
var word = /\S/; | |
// 1. Report sibling text with word characters if it's not the text from | |
// from the previous iteration, pass the inferred previous index (i - 1) | |
// for more accurate reporting, and **continue to the next condition**. | |
if (sibling.nodeType == 3 && word.test(sibling.nodeValue) | |
&& sibling !== textNode) { | |
// Capture as text for the next iteration. | |
textNode = sibling; | |
var result = report(element, textNode, index - 1); | |
results.push(result); | |
} | |
// 2. Next | |
// 2.A Either, report text with word characters and a previous sibling with | |
// nodetype... | |
if (node.nodeType == 3 && word.test(node.nodeValue) && sibling.nodeType) { | |
// Capture as text for the next iteration. | |
textNode = node; | |
var result = report(element, textNode, index); | |
return results.push(result); | |
} | |
// 2.B. Otherwise, visit this node's children if it's an element. | |
if (node.nodeType == 1) { | |
return visit(node).forEach(result => results.push(result)); | |
} | |
}); | |
return results; | |
} | |
// 31 August 2023 | |
// much improved report "algorithm" to handle comment nodes. | |
// instead of splitting innerHTML with a text value that a comment may have, | |
// build up the html to be displayed in the console by iterating the element's | |
// child nodes, amending comment nodes with start and end brackets, and saving | |
// the index cut point. | |
function report(element, textNode, index) { | |
var html = []; | |
var cut = 0; | |
var text = textNode.nodeValue; | |
element.childNodes.forEach((node, i) => { | |
// Only elements have outerHTML, text and comment nodes have nodeValue. | |
var value = node.outerHTML || node.nodeValue; | |
if (node.nodeType == 8) { | |
// Comment nodes do not include their delimiters as part of their value. | |
value = "<!--" + value + "-->"; | |
} | |
html.push(value); | |
if (value == text && i == index) { | |
cut = i; | |
} | |
}); | |
var title = `<${element.nodeName}> `.concat( | |
`element contains a bare text node at [${index}], "${text}":\n` | |
); | |
var [TAG, CLOSING_TAG] = element.outerHTML.split(element.innerHTML); | |
var fragment = TAG.concat(html.slice(0, cut).join(""), "%c", text); | |
var rest = html.slice(cut + 1).concat(CLOSING_TAG).join(""); | |
var message = [title.concat(fragment), "background: pink;", rest]; | |
return message; | |
} | |
/* | |
// 31 August 2023 | |
// deprecated reporting "algorithm", | |
// preserved for posterity so you can judge for yourself... | |
function report(element, text, i) { | |
var parts = element.innerHTML.split(text.nodeValue); | |
var multi = parts.length > 2; | |
var [ a, ...b ] = element.innerHTML.split(text.nodeValue); | |
// 28 August 2023 | |
// Handle the case where a duplicate match later in the node text | |
// should be highlighted instead of the first match. | |
if (multi) { | |
// Remove empty strings except at the end, so we don't inject the node | |
// value when we don't need it: we want ">aa" and not ">aaaa", e.g. | |
a = parts.slice(0, i) | |
.filter((v, k, a) => v || k < a.length - 1) | |
.join(text.nodeValue); | |
// Remove empty string at the beginning of the "rest" fragment. | |
b = parts.slice(i) | |
.filter((v, k) => v || k); | |
} | |
var title = `element contains dangling text node, "${text.nodeValue}":\n`; | |
// The message is an array of three strings: | |
// | |
// 1. title plus the style directive and the identified text node, | |
// 2. the CSS style to be applied, | |
// 3. HTML that follows the identified text node. | |
// | |
// For that reason we use console[method].apply, so that all parts of | |
// the message display in a single output. | |
// | |
// Note that style is applied only to the text after the directive, but not | |
// to the "rest" part. | |
var message = [title + a.concat("%c" + text.nodeValue), "background: pink"]; | |
// Append "rest" only if it's not empty (to avoid "<empty string>" displaying | |
// in the console... | |
var rest = multi | |
? b.join(text.nodeValue) | |
: b.join(""); | |
if (rest) { | |
message.push(rest); | |
} | |
console.warn.apply(console, message); | |
} | |
*/ | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// Tests start here, laid out in console groups and IIFEs (immediately invoked | |
// function expressions). | |
// | |
//////////////////////////////////////////////////////////////////////////////// | |
console.group("depth"); | |
(function () { | |
console.group("before"); | |
(function () { | |
var before = ` | |
<div name="test"> *BEFORE* | |
<form name="F"> | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
</form> | |
</div> | |
`; | |
test(before, { expect: 1 }); | |
/* | |
<DIV> element contains a bare text node at [0], " *BEFORE* | |
": | |
<div name="test"> *BEFORE* | |
<form name="F"> | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
</form> | |
</div> | |
*/ | |
})(); | |
console.groupEnd("before"); | |
console.group("after"); | |
(function () { | |
var after = ` | |
<div name="test"> | |
<form name="F"> | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
</form> | |
after | |
</div> | |
`; | |
test(after, { expect: 1 }); | |
/* | |
<DIV> element contains a bare text node at [2], " | |
after | |
": | |
<div name="test"> | |
<form name="F"> | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
</form> | |
after | |
</div> | |
*/ | |
})(); | |
console.groupEnd("after"); | |
console.group("amidst"); | |
(function() { | |
var amidst = ` | |
<div name="test"> | |
<form name="F"> | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
amidst | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
</form> | |
</div> | |
`; | |
test(amidst, { expect: 1 }); | |
/* | |
<FIELDSET> element contains a bare text node at [2], " | |
amidst | |
": | |
<fieldset name="FS"> | |
<legend>FS</legend> | |
amidst | |
<label for="name">name</label> | |
<input id="name" name="name" value=""> | |
</fieldset> | |
*/ | |
})(); | |
console.groupEnd("amidst"); | |
})(); | |
console.groupEnd("depth"); | |
console.group("before and after"); | |
(function () { | |
test("before <a>and</a> after", { expect: 2 }); | |
/* | |
<BODY> element contains a bare text node at [0], "before ": | |
<body>before <a>and</a> after</body> | |
<BODY> element contains a bare text node at [2], " after": | |
<body>before <a>and</a> after </body> | |
*/ | |
})(); | |
console.groupEnd("before and after"); | |
console.group("nested"); | |
(function () { | |
test("a <a>aa <span>aaa</span> ab</a> c", { expect: 4 }); | |
/* | |
<BODY> element contains a bare text node at [0], "a ": | |
<body>a <a>aa <span>aaa</span> ab</a> c</body> | |
<A> element contains a bare text node at [0], "aa ": | |
<a>aa <span>aaa</span> ab</a> | |
<A> element contains a bare text node at [2], " ab": | |
<a>aa <span>aaa</span> ab </a> | |
<BODY> element contains a bare text node at [2], " c": | |
<body>a <a>aa <span>aaa</span> ab</a> c </body> | |
*/ | |
})(); | |
console.groupEnd("nested"); | |
console.group("spaces around"); | |
(function () { | |
test(" aa <b><!-- spaces --></b> aa ", { expect: 2 }); | |
/* | |
<BODY> element contains a bare text node at [0], "aa ": | |
<body>aa <b><!-- spaces --></b> aa</body> | |
<BODY> element contains a bare text node at [2], " aa": | |
<body>aa <b><!-- spaces --></b> aa </body> | |
*/ | |
})(); | |
console.groupEnd("spaces around"); | |
console.group("ignore spaces only"); | |
(function () { | |
// 2 September 2023 | |
// should ignore spaces only | |
console.group("elements"); | |
(function () { | |
test(` <a> </a> `, { expect: 0 }); | |
// no output | |
})(); | |
console.groupEnd("elements"); | |
console.group("comments"); | |
(function () { | |
test(` <!-- --> `, { expect: 0 }); | |
// no output | |
})(); | |
console.groupEnd("comments"); | |
})(); | |
console.groupEnd("ignore spaces only"); | |
console.group("duplicate"); | |
(function () { | |
// 28 August 2023 | |
// handle duplicate text node values occurrences in an element | |
test("duplicate<b><!-- no spaces --></b>duplicate", { expect: 2 }); | |
/* | |
<BODY> element contains a bare text node at [0], "duplicate": | |
<body>duplicate <b><!-- no spaces --></b>duplicate</body> | |
<BODY> element contains a bare text node at [2], "duplicate": | |
<body>duplicate<b><!-- no spaces --></b>duplicate </body> | |
*/ | |
})(); | |
console.groupEnd("duplicate"); | |
console.group("more than two duplicates"); | |
(function () { | |
// 28 August | |
// more than two occurrences... | |
test("aa<b> 1 </b>aa<b> 2 </b>aa<b> 3 </b>aa<b> 4 </b>aa", { expect: 5 }); | |
/* | |
<BODY> element contains a bare text node at [0], "aa": | |
<body>aa <b> 1 </b>aa<b> 2 </b>aa<b> 3 </b>aa<b> 4 </b>aa</body> | |
<BODY> element contains a bare text node at [2], "aa": | |
<body>aa<b> 1 </b>aa <b> 2 </b>aa<b> 3 </b>aa<b> 4 </b>aa</body> | |
<BODY> element contains a bare text node at [4], "aa": | |
<body>aa<b> 1 </b>aa<b> 2 </b>aa <b> 3 </b>aa<b> 4 </b>aa</body> | |
<BODY> element contains a bare text node at [6], "aa": | |
<body>aa<b> 1 </b>aa<b> 2 </b>aa<b> 3 </b>aa <b> 4 </b>aa</body> | |
<BODY> element contains a bare text node at [8], "aa": | |
<body>aa<b> 1 </b>aa<b> 2 </b>aa<b> 3 </b>aa<b> 4 </b>aa </body> | |
*/ | |
})(); | |
console.groupEnd("more than two duplicates"); | |
console.group("comments bug"); | |
(function () { | |
// 30 August 2023 | |
// comment nodes... | |
// 31 August 2023 | |
// FIXED | |
test("aa<!-- aa -->aa", { expect: 2 }); | |
/* | |
<BODY> element contains a bare text node at [0], "aa": | |
<body>aa <!-- aa -->aa</body> | |
<BODY> element contains a bare text node at [2], "aa": | |
<body>aa<!-- aa -->aa </body> | |
*/ | |
})(); | |
console.groupEnd("comments"); | |
console.group("multiple comments"); | |
(function () { | |
test("bb <!-- bb bb --> bb <!-- bb bb --> bb ", { expect: 3 }); | |
/* | |
<BODY> element contains a bare text node at [0], "bb ": | |
<body>bb <!-- bb bb --> bb <!-- bb bb --> bb</body> | |
<BODY> element contains a bare text node at [2], " bb ": | |
<body>bb <!-- bb bb --> bb <!-- bb bb --> bb</body> | |
<BODY> element contains a bare text node at [4], " bb": | |
<body>bb <!-- bb bb --> bb <!-- bb bb --> bb </body> | |
*/ | |
})(); | |
console.groupEnd("multiple comments"); | |
console.group("body appended"); | |
(function () { | |
// 31 August 2023 | |
// top level appended text node | |
test("aa<!-- aa -->aa ", { append: "aa", expect: 3 }); | |
/* | |
<BODY> element contains a bare text node at [0], "aa": | |
<body>aa <!-- aa -->aa aa appended </body> | |
<BODY> element contains a bare text node at [2], "aa ": | |
<body>aa<!-- aa -->aa aa appended </body> | |
<BODY> element contains a bare text node at [3], "aa appended ": | |
<body>aa<!-- aa -->aa aa appended </body> | |
*/ | |
})(); | |
console.groupEnd("body appended"); | |
console.group("body prepended"); | |
(function () { | |
// 31 August 2023 | |
// top level prepended text nodes | |
test("aa<!-- aa -->aa ", { prepend: "aa", expect: 3 }); | |
/* | |
<BODY> element contains a bare text node at [0], "aa prepended ": | |
<body>aa prepended aa<!-- aa -->aa </body> | |
<BODY> element contains a bare text node at [1], "aa": | |
<body>aa prepended aa <!-- aa -->aa </body> | |
<BODY> element contains a bare text node at [3], "aa ": | |
<body>aa prepended aa<!-- aa -->aa </body> | |
*/ | |
})(); | |
console.groupEnd("body aprepended"); | |
console.group("select options"); | |
(function () { | |
// 1 September 2023 | |
// select | |
test("<select> hmm <option>text</option> uh oh </select>", { expect: 2 }); | |
/* | |
<SELECT> element contains a bare text node at [1], " hmm ": | |
<select> hmm <option>text</option> uh oh </select> | |
<SELECT> element contains a bare text node at [2], " uh oh ": | |
<select> hmm <option>text</option> uh oh </select> | |
*/ | |
})(); | |
console.groupEnd("select options"); | |
console.group("textarea"); | |
console.group("OK"); | |
(function () { | |
// 1 September 2023 | |
// textarea | |
test("<textarea> text area </textarea>", { expect: 0 }); | |
// No output | |
})(); | |
console.groupEnd("OK"); | |
console.group("textarea appended"); | |
(function () { | |
// THIS DETECTED A PROBLEM WITH NEXT SIBLING. | |
test("<textarea> text area </textarea>", { append: "afterwards", expect: 1 }); | |
/* | |
<BODY> element contains a bare text node at [1], " afterwards appended ": | |
<body><textarea> text area </textarea> afterwards appended </body> | |
*/ | |
})(); | |
console.groupEnd("textarea appended"); | |
console.group("textarea prepended"); | |
(function () { | |
test("<textarea> text area </textarea>", { prepend: "beforehand", expect: 1 }); | |
/* | |
<BODY> element contains a bare text node at [1], " beforehand prepended ": | |
<body> beforehand prepended <textarea> text area </textarea></body> | |
*/ | |
})(); | |
console.groupEnd("textarea prepended"); | |
console.groupEnd("textarea"); | |
console.group("XML"); | |
(function () { | |
// 1 September 2023 | |
// XML | |
var note = ` | |
<?xml version="1.0" encoding="UTF-8" ?> | |
<!-- this is a note --> | |
<note> | |
<!-- this is the content of the note --> | |
<to>recipient</to> | |
<from>sender</from> | |
<heading>subject matter</heading> | |
<body>body of this note :)</body> | |
</note> | |
`; | |
console.group("OK"); | |
(function () { | |
test(note, { type: "text/xml", expect: 0 }); | |
// No output | |
})(); | |
console.groupEnd("OK"); | |
console.group("XML appended"); | |
(function () { | |
test(note, { type: "text/xml", append: "afterwards", expect: 1 }); | |
/* | |
<note> element contains a bare text node at [11], " afterwards appended ": | |
<note> | |
<!-- this is the content of the note --> | |
<to>recipient</to> | |
<from>sender</from> | |
<heading>subject matter</heading> | |
<body>body of this note :)</body> | |
afterwards appended </note> | |
*/ | |
})(); | |
console.groupEnd("XML appended"); | |
})(); | |
console.groupEnd("XML"); | |
console.group("nodeName plus attributes"); | |
(function () { | |
test("<p data-attribute>before <a>and</a> after</p>", { expect: 2 }); | |
/* | |
<P> element contains a bare text node at [0], "before ": | |
<p data-attribute="">before <a>and</a> after</p> | |
<P> element contains a bare text node at [2], " after": | |
<p data-attribute="">before <a>and</a> after </p> | |
*/ | |
})(); | |
console.groupEnd("nodeName plus attributes"); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment