Skip to content

Instantly share code, notes, and snippets.

@snewcomer
Created October 19, 2022 18:13
Show Gist options
  • Save snewcomer/16ff81d74a8ddcf35e5a9c2598a3f3a5 to your computer and use it in GitHub Desktop.
Save snewcomer/16ff81d74a8ddcf35e5a9c2598a3f3a5 to your computer and use it in GitHub Desktop.
ember untrusted html
// NOTE: DO NOT DISABLE THIS ON YOUR OWN!
// This lint is disabled here knowing that the contract for this
// helper is that no untrusted content is passed in. If you disable
// this lint and use htmlSafe elsewhere, you could risk opening up an
// XSS vulnerability.
// eslint-disable-next-line name-xss/avoid-xss-risks
import { htmlSafe } from '@ember/template';
const DEFAULT_SAFE_TAGS = ['strong', 'em', 'b', 'i', 'u', 'br'];
const DEFAULT_SAFE_ATTRS = {};
/**
* Sanitizes untrusted HTML by removing all tags and attributes that aren't
* explicitly allowed.
*
* @method untrustedHtml
* @param {String} input HTML from an untrusted source
* @param {config.allowedTags} an array of HTML tags to allow through
* @param {config.extraAllowedTags} an array of extra tags to allow in addition
* to the defaults
* @param {config.allowedAttributes} an object of allowed attributes for each
* tag
*
* allowedAttributes should be an object with tag name keys and array values
* containing all of the attributes allowed for that tag:
*
* allowedAttributes: { 'p': ['class'], 'div': ['role', 'aria-hidden'] }
*
* The above allows ONLY the class attribute for <p> and ONLY the role and
* aria-hidden attributes for <div>.
*
* @public
*/
export function untrustedHtml(input, { allowedTags, extraAllowedTags, allowedAttributes } = {}) {
if (typeof input !== 'string' && typeof input !== 'number') {
return input;
}
if (allowedTags && extraAllowedTags) {
throw new Error('untrustedHtml got both allowedTags and extraAllowedTags');
}
const allowedTagsSet = new Set([].concat(extraAllowedTags || [], allowedTags || DEFAULT_SAFE_TAGS));
const allowedAttributeSets = {};
for (const [tag, attributes] of Object.entries(allowedAttributes || DEFAULT_SAFE_ATTRS)) {
allowedAttributeSets[tag] = new Set(attributes);
}
// NOTE: This is one of the few places where htmlSafe on user input is
// safe, because these functions sanitize the HTML. You'll almost
// always want to use untrustedHtml instead.
// eslint-disable-next-line name-xss/avoid-xss-risks
return htmlSafe(
typeof FastBoot !== 'undefined'
? sanitizeHtmlInNode(input, allowedTagsSet, allowedAttributeSets)
: sanitizeHtmlInBrowser(input, allowedTagsSet, allowedAttributeSets)
);
}
/**
* @method sanitizeHtmlInNode
* @param {String} The untrusted HTML
* @param {Set<String>} The tags to allow
* @param {Map<String, Set<String>>} The attributes to allow for each tag
* @private
*/
function sanitizeHtmlInNode(input, allowedTags, allowedAttributes) {
const { JSDOM } = FastBoot.require('jsdom');
const unsafeDocument = new JSDOM(input).window.document;
return sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes);
}
// Shared DOMParser instance (avoids creating a new one for each sanitization)
let parser = null;
/**
* @method sanitizeHtmlInBrowser
* @param {String} The untrusted HTML
* @param {Set<String>} The tags to allow
* @param {Map<String, Set<String>>} The attributes to allow for each tag
* @private
*/
function sanitizeHtmlInBrowser(input, allowedTags, allowedAttributes) {
if (!parser) {
parser = new DOMParser();
}
const unsafeDocument = parser.parseFromString(`${input}`, 'text/html');
return sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes);
}
/**
* @method sanitizeDocument
* @param {HTMLDocument} The parsed untrusted HTML document
* @param {Set<String>} The tags to allow
* @param {Map<String, Set<String>>} The attributes to allow for each tag
* @private
*/
function sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes) {
const sanitizedContainer = unsafeDocument.createElement('div');
for (const child of [...unsafeDocument.body.childNodes]) {
const sanitizedChild = sanitizeNode(child, allowedTags, allowedAttributes);
if (sanitizedChild) {
sanitizedContainer.appendChild(sanitizedChild);
}
}
return sanitizedContainer.innerHTML;
}
/**
* @method sanitizeNode
* @param {Node} A parsed untrusted HTML node
* @param {Set<String>} The tags to allow
* @param {Map<String, Set<String>>} The attributes to allow for each tag
* @private
*/
function sanitizeNode(node, allowedTags, allowedAttributes) {
// Plain text is safe as is
// NOTE: The lowercase node (instead of Node) is intentional. Node is only
// accessible in browser. In Node.js, it depends on jsdom (which we
// avoid importing to exclude from the clientside vendor bundle).
// Instead of passing down window.Node or jsdom.Node depending on
// context, we rely on the fact that instances of Node (of which node
// will be one) will also have these constants set on them.
if ([node.TEXT_NODE, node.CDATA_SECTION_NODE].includes(node.nodeType)) {
return node;
}
// Refuse anything that isn't a tag or one of the allowed tags
const tagName = (node.tagName || '').toLowerCase();
if (!allowedTags.has(tagName)) {
return null;
}
// Reconstruct node with only the allowedAttributes and sanitize its children
const sanitized = node.ownerDocument.createElement(tagName);
const currentlyAllowedAttributes = allowedAttributes[tagName] || new Set();
for (const { name, nodeValue: value } of [...node.attributes]) {
if (currentlyAllowedAttributes.has(name)) {
sanitized.setAttribute(name, value);
}
}
for (const child of [...node.childNodes]) {
const sanitizedChild = sanitizeNode(child, allowedTags, allowedAttributes);
if (sanitizedChild) {
sanitized.appendChild(sanitizedChild);
}
}
return sanitized;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment