Skip to content

Instantly share code, notes, and snippets.

@nberlette
Last active June 29, 2021 02:10
Show Gist options
  • Save nberlette/651de395333273587cd6b695396b0fc0 to your computer and use it in GitHub Desktop.
Save nberlette/651de395333273587cd6b695396b0fc0 to your computer and use it in GitHub Desktop.
Cheerio + HTMLRewriter for Cloudflare Workers
/**
* Cheerio Wrapper for HTMLRewriter on Cloudflare Workers®
*
* Usage of cheerio here only simulates stream-oriented parser! It is slow!
*/
import cheerio from 'cheerio'
function replace(content, options) {
this[0].nodeValue = content
}
function hasAttribute(name) {
return (this.getAttribute(name) !== undefined)
}
function getAttribute(name) {
return this.attr(name)
}
function setAttribute(name, value) {
this.attr(name, value)
}
export default class HTMLRewriter {
constructor() {
this.selectors = []
}
on(element, handler) {
this.selectors.push([element, handler])
return this
}
async transform(response) {
const text = await response.text()
const $ = cheerio.load(text)
// Simulate stream-based parser
this.walk($, $.root())
return new Response($.root().html(), response)
}
walk($, node) {
const $node = this.wrapElement($, node)
// Select matching HTMLRewrite handlers
const matchedHandlers = []
for(const [selector, handler] of this.selectors) {
if ($node.is(selector)) {
matchedHandlers.push(handler)
}
}
// Trigger HTMLRewrite handlers on Element
for(const handler of matchedHandlers) {
handler.element && handler.element($node)
}
// Walk all children
const lastNode = $node.contents().length - 1
const children = $node.contents().toArray()
for (const [i, child] of children.entries()) {
if (child.nodeType === 1) {
this.walk($, child)
} else {
const lastInNode = (i === lastNode) || (children[i+1].nodeType !== child.nodeType)
const $child = this.wrapOther($, child, lastInNode)
for (let handler of matchedHandlers) {
if (child.nodeType === 3) { // text node
handler.text && handler.text($child)
} else if (child.nodeType === 8) { // comment node
handler.comments && handler.comments($child)
}
}
}
}
}
wrapElement($, node) {
const $node = $(node)
$node.tagName = node.name
$node.hasAttribute = hasAttribute.bind($node)
$node.getAttribute = getAttribute.bind($node)
$node.setAttribute = setAttribute.bind($node)
return $node
}
wrapOther($, node, lastInNode) {
const $node = $(node)
$node.text = node.nodeValue
$node.replace = replace.bind($node)
if (node.nodeType === 3) {
$node.lastInTextNode = lastInNode
}
return $node
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment