Last active
November 22, 2025 06:55
-
-
Save dfkaye/94bc03241c4c3bf458ab0dc5d56b1958 to your computer and use it in GitHub Desktop.
Using XML, XSLT, XHR, to parse and serialize HTML in the browser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // 19 June 2023 | |
| // Terri passed away | |
| // I got this to work | |
| // XSLT in the browser | |
| // 12 July 2023 | |
| // replaced DOMParser and "trusted" policy with async xhr parser | |
| // see https://gist.github.com/dfkaye/a83f89d7496bb669570a1de207b5b8d4 | |
| // 13-14 July 2023 | |
| // shrunk the xhr parse function | |
| // 22 November 2023 | |
| // more tiny cleanup on the XHR part, xsl, dom, and fragment console statements. | |
| // 29 November 2023 | |
| // type can be specified as xml or html without "text/" or "application/". | |
| // missing type returns an XML document with no contentType or body, but | |
| // as an otherwise HTML document. | |
| // 2 March 2024 | |
| // Added an alternative example using XSLT's document() function at | |
| // https://gist.github.com/dfkaye/882796e70b12940a616fcb7434ec9ddd | |
| // 17 May 2024 | |
| // shrunk the parse function further | |
| // 21 November 2025 | |
| // Reverted 29 November 2023 decision. Now the type param defaults | |
| // to "xml" and the Blob's type defaults to "text/${type}" so that a | |
| // missing type parameter results in a populated XML document response. | |
| async function parse({ text = "", type = "xml" }) { | |
| var blob = new Blob([text], { type: `text/${type ? type : "xml"}` }); | |
| var url = URL.createObjectURL(blob); | |
| var xhr = new XMLHttpRequest(); | |
| xhr.open("GET", url); | |
| xhr.responseType = "document"; | |
| xhr.send(); | |
| return new Promise(function (resolve) { | |
| xhr.onload = function () { | |
| resolve(xhr.response); | |
| }; | |
| }); | |
| } | |
| /* test it out */ | |
| var xslsrc = ` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
| <xsl:output method="html" indent="yes" /> | |
| <xsl:template match="/"> | |
| <h>update</h> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="//b"> | |
| <updated><xsl:value-of select="." /></updated> | |
| <e>end</e> | |
| </xsl:template> | |
| </xsl:stylesheet> | |
| `.trim(); | |
| console.group("parse XSL"); | |
| var s = Date.now(); | |
| var xsl = await parse({ text: xslsrc, type: "xml" }); | |
| console.log(Date.now() - s + "ms"); | |
| console.log( xsl.contentType ); | |
| console.warn(xsl.documentElement.outerHTML); | |
| console.groupEnd("parse XSL"); | |
| console.group("import XSLT"); | |
| var xslt = new XSLTProcessor; | |
| var s = Date.now(); | |
| xslt.importStylesheet(xsl); | |
| console.log(Date.now() - s + "ms"); | |
| console.log(xslt); | |
| console.groupEnd("import XSLT"); | |
| console.group("parse HTML"); | |
| var domsrc = ` | |
| <b data-value="xml-node">original value</b> | |
| `.trim(); | |
| var s = Date.now(); | |
| var dom = await parse({ text: domsrc, type: "html" }); | |
| console.log(Date.now() - s + "ms"); | |
| console.log( dom.contentType ); | |
| console.log(dom.querySelector("[data-value=\"xml-node\"]").nodeValue); | |
| console.warn(dom.documentElement.outerHTML); | |
| console.groupEnd("parse HTML"); | |
| console.group("transform"); | |
| var s = Date.now(); | |
| var fragment = xslt.transformToFragment(dom.body, document); | |
| console.log(Date.now() - s + "ms"); | |
| console.log(fragment); | |
| fragment.childNodes.forEach(node => { | |
| console.warn(node.outerHTML || node.nodeValue); | |
| }); | |
| console.groupEnd("transform"); | |
| console.group("serialize"); | |
| var xsr = new XMLSerializer; | |
| var s = Date.now(); | |
| var str = xsr.serializeToString(fragment); | |
| console.log(Date.now() - s + "ms"); | |
| console.log(str); | |
| console.groupEnd("serialize"); | |
| console.group("missing type"); | |
| // missing type returns XML document with `contentType: "text/xml"`, | |
| // `body: null`, but with otherwise HTML DOM fields, e.g., firstElementChild, | |
| // childNodes, on<event> methods, etc. | |
| console.log(await parse({ text: "<a href=\"/\">_</a>" })); | |
| console.groupEnd("missing type"); | |
| console.group("empty type"); | |
| // response should be XML document | |
| console.log(await parse({ text: "<b>b</b>", type: "" })); | |
| console.groupEnd("empty type"); | |
| console.group("missing text"); | |
| // response should be null | |
| console.log(await parse({ type: "html" })); | |
| console.groupEnd("missing text"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment