dfkaye · November 29, 2023 23:03
diff --git a/find-bare-text-siblings-using-XPath.js b/find-bare-text-siblings-using-XPath.js
 // 5 September 2023
 // Find uncontrolled or bare text node siblings in a DOM,
 // XPath approach.

 // SEE sketch 27 August 2023, Node iteration approach.
 // https://gist.github.com/dfkaye/6cc8a9cda513dcf97f7a45ba89f4000b

 // 6 September 2023
 // I have to say,
 // 1. Over all, this solution feels better than the node iteration version,
 // 2. XPath selectors take time to get right,
 // 3. the visit() function is far shorter than the node iteration version,
 // 4. the test() function uses its own console styles as well, but... meh,
 // 5. the report() function is a bit longer than the node iteration version due
 //    to the branching logic,
 // 6. it's easy to go overboard with Object.assign and friends ;) - I spent way
 //    too much time shaving message construction and test output logic yaks.


 function test({ name, text }) {
  // parse

  var dom = (new DOMParser()).parseFromString(text.trim(), "text/html");

  document.body.replaceChildren(...dom.body.childNodes);

  var contentNode = document.body;
  var set = visit(contentNode);
  var messages = report(set);

  !messages.length
    ? console.log(
        `\u{1F600} %cNo bare text nodes in ${name} test.`,
        "background: lightgreen; padding: 2px;"
      )
    : (
        console.log(
          `\u{1F626} %c${messages.length} bare text nodes in ${name} test.`,
          "background: pink; padding: 2px;"
        ),
        messages.forEach((message, i) => {

          // Each message is an object with start, style, and (optionally) rest
          // fields. We reduce them into an array, prepending the start text
          // with the index of the message within the list of messages returned
          // for the contentNode under test.

          var output = Object.keys(message).reduce((A, k) => {
            if (k == 'start') {
              message[k] = [(i + 1) + ": "] + message[k];
            }

            A.push(message[k]);

            return A;
          }, []);

          console.log.apply(console, output);
        })
      );
 }

 function visit(node) {
  // visit

  var xpath = `//*[count(./node()) > 0][count(./text()) != count(./node())]`;
  var contentNode = node;
  var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode);
  var resultType = XPathResult.ANY_TYPE;
  var result = null;
  var args = [xpath, contentNode, namespaceResolver, resultType, result]
  var set = document.evaluate(...args);

  return set;
 }

 function report(set) {
  // report

  var messages = [];
  var element;

  while (element = set.iterateNext()) {

    // We increment the variable, at, every time we find an element's content
    // contains more than one occurrance of a text node's value, and use it in
    // each iteration to prepend ever longer start text to the styled statement
    // containing the text value, and ever shorter rest text.

    var at = 0;

    element.cloneNode(true).childNodes.forEach((n, i) => {
      if (n.nodeType != 3 || !/\S/.test(n.nodeValue)) {
        return;
      }

      var title = `<${element.nodeName}> contains a bare text sibling`;
      var p = element.innerHTML.split(n.nodeValue);

      var message, a, b, style;

      if (p.length == 2) {

        // If there's only one occurrance, then p has two parts, and joining is
        // trivial.

        // I find using a and b easier scan than p[0] and p[1].
        [a, b] = p;

        style = "background: lightskyblue;";
        title += `, "${n.nodeValue}"\n`;
      }
      else {

        // If there's more than one occurrance, then p has more than two parts,
        // so we increment the tracking variable, at, and slice p into smaller
        // arrays (start and rest) and rejoin those.

        at += 1;

        [a, b] = [
          p.slice(0, at).join(n.nodeValue),
          p.slice(at).join(n.nodeValue)
        ];

        style = "background: aqua;";
        title += `, "${n.nodeValue}" (${at} of ${p.length - 1} occurrances).\n`;
      }

      var [TAG, CLOSING_TAG] = element.outerHTML.split(element.innerHTML);

      message = Object.assign({
         start: title + TAG + a + "%c" + n.nodeValue,
         style,
         rest: b + CLOSING_TAG
      });

      messages.push(message);
    });
  }

  return messages;
 }


 /* test it out */


 var bareText = `
 <head><title> * title * </title></head>
 <body>
 first
 <main>
  one <a>main link one </a> two.
  <article>
    three <a>article link </a> four.
  </article>
  <!-- comment -->
  <aside>
    five <a>aside link </a> six.
  </aside>
  seven <a>main link seven two </a> eight.
 </main>
 <footer>
 AA <b>i</b> AA <b>i</b> AA <b>i</b> AA <b>i</b> ZZ <i>i</i> AA <b>i</b> AA
 </footer>
 last <b> b </b> last
 </body>
 `;

 var noBareText = `
 <head><title> * title * </title></head>
 <body>
 <h1>first</h1>
 <main>
  <b>one</b> <a>main link one </a> <b>two.</b>
  <article>
    <b>three</b> <a>article link </a> <b>four.</b>
  </article>
  <!-- comment -->
  <aside>
    <b>five</b> <a>aside link </a> <b>six.</b>
  </aside>
  <b>seven</b> <a>main link seven two </a> <b>eight.</b>
 </main>
 <b> b </b>
 </body>
 `;

 var tests = [
  { name:"bareText", text: bareText },
  { name:"noBareText", text: noBareText }
 ];

 tests.forEach(item => test(item));
	// 5 September 2023
	// Find uncontrolled or bare text node siblings in a DOM,
	// XPath approach.

	// SEE sketch 27 August 2023, Node iteration approach.
	// https://gist.github.com/dfkaye/6cc8a9cda513dcf97f7a45ba89f4000b

	// 6 September 2023
	// I have to say,
	// 1. Over all, this solution feels better than the node iteration version,
	// 2. XPath selectors take time to get right,
	// 3. the visit() function is far shorter than the node iteration version,
	// 4. the test() function uses its own console styles as well, but... meh,
	// 5. the report() function is a bit longer than the node iteration version due
	// to the branching logic,
	// 6. it's easy to go overboard with Object.assign and friends ;) - I spent way
	// too much time shaving message construction and test output logic yaks.


	function test({ name, text }) {
	// parse

	var dom = (new DOMParser()).parseFromString(text.trim(), "text/html");

	document.body.replaceChildren(...dom.body.childNodes);

	var contentNode = document.body;
	var set = visit(contentNode);
	var messages = report(set);

	!messages.length
	? console.log(
	`\u{1F600} %cNo bare text nodes in ${name} test.`,
	"background: lightgreen; padding: 2px;"
	)
	: (
	console.log(
	`\u{1F626} %c${messages.length} bare text nodes in ${name} test.`,
	"background: pink; padding: 2px;"
	),
	messages.forEach((message, i) => {

	// Each message is an object with start, style, and (optionally) rest
	// fields. We reduce them into an array, prepending the start text
	// with the index of the message within the list of messages returned
	// for the contentNode under test.

	var output = Object.keys(message).reduce((A, k) => {
	if (k == 'start') {
	message[k] = [(i + 1) + ": "] + message[k];
	}

	A.push(message[k]);

	return A;
	}, []);

	console.log.apply(console, output);
	})
	);
	}

	function visit(node) {
	// visit

	var xpath = `//*[count(./node()) > 0][count(./text()) != count(./node())]`;
	var contentNode = node;
	var namespaceResolver = (new XPathEvaluator()).createNSResolver(contentNode);
	var resultType = XPathResult.ANY_TYPE;
	var result = null;
	var args = [xpath, contentNode, namespaceResolver, resultType, result]
	var set = document.evaluate(...args);

	return set;
	}

	function report(set) {
	// report

	var messages = [];
	var element;

	while (element = set.iterateNext()) {

	// We increment the variable, at, every time we find an element's content
	// contains more than one occurrance of a text node's value, and use it in
	// each iteration to prepend ever longer start text to the styled statement
	// containing the text value, and ever shorter rest text.

	var at = 0;

	element.cloneNode(true).childNodes.forEach((n, i) => {
	if (n.nodeType != 3 \|\| !/\S/.test(n.nodeValue)) {
	return;
	}

	var title = `<${element.nodeName}> contains a bare text sibling`;
	var p = element.innerHTML.split(n.nodeValue);

	var message, a, b, style;

	if (p.length == 2) {

	// If there's only one occurrance, then p has two parts, and joining is
	// trivial.

	// I find using a and b easier scan than p[0] and p[1].
	[a, b] = p;

	style = "background: lightskyblue;";
	title += `, "${n.nodeValue}"\n`;
	}
	else {

	// If there's more than one occurrance, then p has more than two parts,
	// so we increment the tracking variable, at, and slice p into smaller
	// arrays (start and rest) and rejoin those.

	at += 1;

	[a, b] = [
	p.slice(0, at).join(n.nodeValue),
	p.slice(at).join(n.nodeValue)
	];

	style = "background: aqua;";
	title += `, "${n.nodeValue}" (${at} of ${p.length - 1} occurrances).\n`;
	}

	var [TAG, CLOSING_TAG] = element.outerHTML.split(element.innerHTML);

	message = Object.assign({
	start: title + TAG + a + "%c" + n.nodeValue,
	style,
	rest: b + CLOSING_TAG
	});

	messages.push(message);
	});
	}

	return messages;
	}


	/* test it out */


	var bareText = `
	<head><title> * title * </title></head>
	<body>
	first
	<main>
	one <a>main link one </a> two.
	<article>
	three <a>article link </a> four.
	</article>
	<!-- comment -->
	<aside>
	five <a>aside link </a> six.
	</aside>
	seven <a>main link seven two </a> eight.
	</main>
	<footer>
	AA <b>i</b> AA <b>i</b> AA <b>i</b> AA <b>i</b> ZZ <i>i</i> AA <b>i</b> AA
	</footer>
	last <b> b </b> last
	</body>
	`;

	var noBareText = `
	<head><title> * title * </title></head>
	<body>
	<h1>first</h1>
	<main>
	<b>one</b> <a>main link one </a> <b>two.</b>
	<article>
	<b>three</b> <a>article link </a> <b>four.</b>
	</article>
	<!-- comment -->
	<aside>
	<b>five</b> <a>aside link </a> <b>six.</b>
	</aside>
	<b>seven</b> <a>main link seven two </a> <b>eight.</b>
	</main>
	<b> b </b>
	</body>
	`;

	var tests = [
	{ name:"bareText", text: bareText },
	{ name:"noBareText", text: noBareText }
	];

	tests.forEach(item => test(item));