Created
October 19, 2022 18:13
-
-
Save snewcomer/16ff81d74a8ddcf35e5a9c2598a3f3a5 to your computer and use it in GitHub Desktop.
ember untrusted html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NOTE: DO NOT DISABLE THIS ON YOUR OWN! | |
// This lint is disabled here knowing that the contract for this | |
// helper is that no untrusted content is passed in. If you disable | |
// this lint and use htmlSafe elsewhere, you could risk opening up an | |
// XSS vulnerability. | |
// eslint-disable-next-line name-xss/avoid-xss-risks | |
import { htmlSafe } from '@ember/template'; | |
const DEFAULT_SAFE_TAGS = ['strong', 'em', 'b', 'i', 'u', 'br']; | |
const DEFAULT_SAFE_ATTRS = {}; | |
/** | |
* Sanitizes untrusted HTML by removing all tags and attributes that aren't | |
* explicitly allowed. | |
* | |
* @method untrustedHtml | |
* @param {String} input HTML from an untrusted source | |
* @param {config.allowedTags} an array of HTML tags to allow through | |
* @param {config.extraAllowedTags} an array of extra tags to allow in addition | |
* to the defaults | |
* @param {config.allowedAttributes} an object of allowed attributes for each | |
* tag | |
* | |
* allowedAttributes should be an object with tag name keys and array values | |
* containing all of the attributes allowed for that tag: | |
* | |
* allowedAttributes: { 'p': ['class'], 'div': ['role', 'aria-hidden'] } | |
* | |
* The above allows ONLY the class attribute for <p> and ONLY the role and | |
* aria-hidden attributes for <div>. | |
* | |
* @public | |
*/ | |
export function untrustedHtml(input, { allowedTags, extraAllowedTags, allowedAttributes } = {}) { | |
if (typeof input !== 'string' && typeof input !== 'number') { | |
return input; | |
} | |
if (allowedTags && extraAllowedTags) { | |
throw new Error('untrustedHtml got both allowedTags and extraAllowedTags'); | |
} | |
const allowedTagsSet = new Set([].concat(extraAllowedTags || [], allowedTags || DEFAULT_SAFE_TAGS)); | |
const allowedAttributeSets = {}; | |
for (const [tag, attributes] of Object.entries(allowedAttributes || DEFAULT_SAFE_ATTRS)) { | |
allowedAttributeSets[tag] = new Set(attributes); | |
} | |
// NOTE: This is one of the few places where htmlSafe on user input is | |
// safe, because these functions sanitize the HTML. You'll almost | |
// always want to use untrustedHtml instead. | |
// eslint-disable-next-line name-xss/avoid-xss-risks | |
return htmlSafe( | |
typeof FastBoot !== 'undefined' | |
? sanitizeHtmlInNode(input, allowedTagsSet, allowedAttributeSets) | |
: sanitizeHtmlInBrowser(input, allowedTagsSet, allowedAttributeSets) | |
); | |
} | |
/** | |
* @method sanitizeHtmlInNode | |
* @param {String} The untrusted HTML | |
* @param {Set<String>} The tags to allow | |
* @param {Map<String, Set<String>>} The attributes to allow for each tag | |
* @private | |
*/ | |
function sanitizeHtmlInNode(input, allowedTags, allowedAttributes) { | |
const { JSDOM } = FastBoot.require('jsdom'); | |
const unsafeDocument = new JSDOM(input).window.document; | |
return sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes); | |
} | |
// Shared DOMParser instance (avoids creating a new one for each sanitization) | |
let parser = null; | |
/** | |
* @method sanitizeHtmlInBrowser | |
* @param {String} The untrusted HTML | |
* @param {Set<String>} The tags to allow | |
* @param {Map<String, Set<String>>} The attributes to allow for each tag | |
* @private | |
*/ | |
function sanitizeHtmlInBrowser(input, allowedTags, allowedAttributes) { | |
if (!parser) { | |
parser = new DOMParser(); | |
} | |
const unsafeDocument = parser.parseFromString(`${input}`, 'text/html'); | |
return sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes); | |
} | |
/** | |
* @method sanitizeDocument | |
* @param {HTMLDocument} The parsed untrusted HTML document | |
* @param {Set<String>} The tags to allow | |
* @param {Map<String, Set<String>>} The attributes to allow for each tag | |
* @private | |
*/ | |
function sanitizeDocument(unsafeDocument, allowedTags, allowedAttributes) { | |
const sanitizedContainer = unsafeDocument.createElement('div'); | |
for (const child of [...unsafeDocument.body.childNodes]) { | |
const sanitizedChild = sanitizeNode(child, allowedTags, allowedAttributes); | |
if (sanitizedChild) { | |
sanitizedContainer.appendChild(sanitizedChild); | |
} | |
} | |
return sanitizedContainer.innerHTML; | |
} | |
/** | |
* @method sanitizeNode | |
* @param {Node} A parsed untrusted HTML node | |
* @param {Set<String>} The tags to allow | |
* @param {Map<String, Set<String>>} The attributes to allow for each tag | |
* @private | |
*/ | |
function sanitizeNode(node, allowedTags, allowedAttributes) { | |
// Plain text is safe as is | |
// NOTE: The lowercase node (instead of Node) is intentional. Node is only | |
// accessible in browser. In Node.js, it depends on jsdom (which we | |
// avoid importing to exclude from the clientside vendor bundle). | |
// Instead of passing down window.Node or jsdom.Node depending on | |
// context, we rely on the fact that instances of Node (of which node | |
// will be one) will also have these constants set on them. | |
if ([node.TEXT_NODE, node.CDATA_SECTION_NODE].includes(node.nodeType)) { | |
return node; | |
} | |
// Refuse anything that isn't a tag or one of the allowed tags | |
const tagName = (node.tagName || '').toLowerCase(); | |
if (!allowedTags.has(tagName)) { | |
return null; | |
} | |
// Reconstruct node with only the allowedAttributes and sanitize its children | |
const sanitized = node.ownerDocument.createElement(tagName); | |
const currentlyAllowedAttributes = allowedAttributes[tagName] || new Set(); | |
for (const { name, nodeValue: value } of [...node.attributes]) { | |
if (currentlyAllowedAttributes.has(name)) { | |
sanitized.setAttribute(name, value); | |
} | |
} | |
for (const child of [...node.childNodes]) { | |
const sanitizedChild = sanitizeNode(child, allowedTags, allowedAttributes); | |
if (sanitizedChild) { | |
sanitized.appendChild(sanitizedChild); | |
} | |
} | |
return sanitized; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment