dfkaye · July 22, 2023 19:58
diff --git a/async-parse-html.js b/async-parse-html.js
 // gist 700!

 // 7 July 2023
 // Using XHR to parse local HTML strings into DOM documents
 // to get around "trusted HTML" and other DOMParser/innerHTML hogwash.

 // 22 July 2023
 // Added <img src="evil" onerror="alert('pwnd')">
 // If src is blocked by CSP img-src or default-src, Firefox does not execute onerror,
 // but Chrome still executes it.
 // If src is 404'd, chrome and firefox execute the alert.
 // To prevent onerror executing you should define a CSP directive
 // that does not allow script-src `unsafe-inline`.

 // Our HTML fragment text

 var source = `
 <section id="x-fragment" onclick="alert('evil')">
  <fake>&amp;& < &lt; title</fake>
  <meta charset="UTF-8">
  <script>alert(1);</script>
  <img src="evil" onerror="alert('pwnd')">
 </section>
 `;

 try {
  // The following fails in Trusted HTML CSP restricted apps like Chrome's blank tab.
  new DOMParser().parseFromString(source, "text/html");
 } catch (e) {
  console.error(e);
  /*
    This document requires 'TrustedHTML' assignment.
    -------
    TypeError: Failed to execute 'parseFromString' on 'DOMParser': This document requires 'TrustedHTML' assignment.
  */
 }

 // However, we can use the native XMLHttpRequest to request, parse a
 // blob of HTML into a "document" response and resolve a promise
 // with that response.

 // The parseHTML function accepts an HTML string and returns the promise
 // object so it can be awaited.

 async function parseHTML(source) {
  var handler = {};
  var promise = new Promise(function(resolve) {
    handler.resolve = resolve;
  });
  var blob = new Blob([source], { type: "text/html" });
  var url = URL.createObjectURL(blob);
  var xhr = new XMLHttpRequest;

  xhr.responseType = "document";
  xhr.onload = function () {
    requestAnimationFrame(function () {

      // The API optimization step to go here is to resolve on
      // `xhr.response.querySelector("body") instead.

      handler.resolve(xhr.response);
    });
  };
  xhr.open("GET", url);
  xhr.send('');

  return promise;
 }

 // the fetch API does not handle document as a response type.
 // fetch(url)
 //     .then(function(response) { return response.text(); })
 //     .then(function(text) { console.log(text); });

 /* test it out */

 var dom = await parseHTML(source);

 console.log(dom);
 console.log(dom.documentElement.outerHTML);
 console.log(dom.querySelector("body").outerHTML);
 console.log(dom.querySelector("body").innerHTML);
 console.log(dom.querySelector("#x-fragment").outerHTML);
 /*
 <section id="x-fragment" onclick="alert('evil')">
  <fake>&amp;&amp; &lt; &lt; title</fake>
  <meta charset="UTF-8">
  <script>alert(1);</script>
  <img src="evil" onerror="alert('pwnd')">
 </section>
 */

 // Now you can add these live DOM elements into your app...

 var div = document.createElement("div");
 div.id = "test-container";
 div.replaceChildren(dom.querySelector("#x-fragment"));

 console.log(div.outerHTML);

 // Note that all tags are preserved while certain characters are escaped.

 /*
 <div id="test-container"><section id="x-fragment" onclick="alert('evil')">
  <fake>&amp;&amp; &lt; &lt; title</fake>
  <meta charset="UTF-8">
  <script>alert(1);</script>
  <img src="evil" onerror="alert('pwnd')">
 </section></div>
 */


 // 22 July 2023 - XSS attacks.

 // At this point when the image is inserted into the detached element,
 // the src request and inline onerror handler will execute.

 // Once the element is attached to the DOM, the inline click handler
 // in the section element is executable.

 document.body.append(div);

 document.body.querySelector("#x-fragment").click();

 // To prevent the src request, you should define a Content Security Policy
 // with a default-src or img-src with a limited set of domains.

 // To prevent the inline handler attacks, you should define a Content Security Policy
 // with a script-src that excludes `unsafe-inline`.
	// gist 700!

	// 7 July 2023
	// Using XHR to parse local HTML strings into DOM documents
	// to get around "trusted HTML" and other DOMParser/innerHTML hogwash.

	// 22 July 2023
	// Added <img src="evil" onerror="alert('pwnd')">
	// If src is blocked by CSP img-src or default-src, Firefox does not execute onerror,
	// but Chrome still executes it.
	// If src is 404'd, chrome and firefox execute the alert.
	// To prevent onerror executing you should define a CSP directive
	// that does not allow script-src `unsafe-inline`.

	// Our HTML fragment text

	var source = `
	<section id="x-fragment" onclick="alert('evil')">
	<fake>&& < < title</fake>
	<meta charset="UTF-8">
	<script>alert(1);</script>
	<img src="evil" onerror="alert('pwnd')">
	</section>
	`;

	try {
	// The following fails in Trusted HTML CSP restricted apps like Chrome's blank tab.
	new DOMParser().parseFromString(source, "text/html");
	} catch (e) {
	console.error(e);
	/*
	This document requires 'TrustedHTML' assignment.
	-------
	TypeError: Failed to execute 'parseFromString' on 'DOMParser': This document requires 'TrustedHTML' assignment.
	*/
	}

	// However, we can use the native XMLHttpRequest to request, parse a
	// blob of HTML into a "document" response and resolve a promise
	// with that response.

	// The parseHTML function accepts an HTML string and returns the promise
	// object so it can be awaited.

	async function parseHTML(source) {
	var handler = {};
	var promise = new Promise(function(resolve) {
	handler.resolve = resolve;
	});
	var blob = new Blob([source], { type: "text/html" });
	var url = URL.createObjectURL(blob);
	var xhr = new XMLHttpRequest;

	xhr.responseType = "document";
	xhr.onload = function () {
	requestAnimationFrame(function () {

	// The API optimization step to go here is to resolve on
	// `xhr.response.querySelector("body") instead.

	handler.resolve(xhr.response);
	});
	};
	xhr.open("GET", url);
	xhr.send('');

	return promise;
	}

	// the fetch API does not handle document as a response type.
	// fetch(url)
	// .then(function(response) { return response.text(); })
	// .then(function(text) { console.log(text); });

	/* test it out */

	var dom = await parseHTML(source);

	console.log(dom);
	console.log(dom.documentElement.outerHTML);
	console.log(dom.querySelector("body").outerHTML);
	console.log(dom.querySelector("body").innerHTML);
	console.log(dom.querySelector("#x-fragment").outerHTML);
	/*
	<section id="x-fragment" onclick="alert('evil')">
	<fake>&& < < title</fake>
	<meta charset="UTF-8">
	<script>alert(1);</script>
	<img src="evil" onerror="alert('pwnd')">
	</section>
	*/

	// Now you can add these live DOM elements into your app...

	var div = document.createElement("div");
	div.id = "test-container";
	div.replaceChildren(dom.querySelector("#x-fragment"));

	console.log(div.outerHTML);

	// Note that all tags are preserved while certain characters are escaped.

	/*
	<div id="test-container"><section id="x-fragment" onclick="alert('evil')">
	<fake>&& < < title</fake>
	<meta charset="UTF-8">
	<script>alert(1);</script>
	<img src="evil" onerror="alert('pwnd')">
	</section></div>
	*/


	// 22 July 2023 - XSS attacks.

	// At this point when the image is inserted into the detached element,
	// the src request and inline onerror handler will execute.

	// Once the element is attached to the DOM, the inline click handler
	// in the section element is executable.

	document.body.append(div);

	document.body.querySelector("#x-fragment").click();

	// To prevent the src request, you should define a Content Security Policy
	// with a default-src or img-src with a limited set of domains.

	// To prevent the inline handler attacks, you should define a Content Security Policy
	// with a script-src that excludes `unsafe-inline`.