Last active
June 29, 2021 02:10
-
-
Save nberlette/651de395333273587cd6b695396b0fc0 to your computer and use it in GitHub Desktop.
Cheerio + HTMLRewriter for Cloudflare Workers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Cheerio Wrapper for HTMLRewriter on Cloudflare Workers® | |
* | |
* Usage of cheerio here only simulates stream-oriented parser! It is slow! | |
*/ | |
import cheerio from 'cheerio' | |
function replace(content, options) { | |
this[0].nodeValue = content | |
} | |
function hasAttribute(name) { | |
return (this.getAttribute(name) !== undefined) | |
} | |
function getAttribute(name) { | |
return this.attr(name) | |
} | |
function setAttribute(name, value) { | |
this.attr(name, value) | |
} | |
export default class HTMLRewriter { | |
constructor() { | |
this.selectors = [] | |
} | |
on(element, handler) { | |
this.selectors.push([element, handler]) | |
return this | |
} | |
async transform(response) { | |
const text = await response.text() | |
const $ = cheerio.load(text) | |
// Simulate stream-based parser | |
this.walk($, $.root()) | |
return new Response($.root().html(), response) | |
} | |
walk($, node) { | |
const $node = this.wrapElement($, node) | |
// Select matching HTMLRewrite handlers | |
const matchedHandlers = [] | |
for(const [selector, handler] of this.selectors) { | |
if ($node.is(selector)) { | |
matchedHandlers.push(handler) | |
} | |
} | |
// Trigger HTMLRewrite handlers on Element | |
for(const handler of matchedHandlers) { | |
handler.element && handler.element($node) | |
} | |
// Walk all children | |
const lastNode = $node.contents().length - 1 | |
const children = $node.contents().toArray() | |
for (const [i, child] of children.entries()) { | |
if (child.nodeType === 1) { | |
this.walk($, child) | |
} else { | |
const lastInNode = (i === lastNode) || (children[i+1].nodeType !== child.nodeType) | |
const $child = this.wrapOther($, child, lastInNode) | |
for (let handler of matchedHandlers) { | |
if (child.nodeType === 3) { // text node | |
handler.text && handler.text($child) | |
} else if (child.nodeType === 8) { // comment node | |
handler.comments && handler.comments($child) | |
} | |
} | |
} | |
} | |
} | |
wrapElement($, node) { | |
const $node = $(node) | |
$node.tagName = node.name | |
$node.hasAttribute = hasAttribute.bind($node) | |
$node.getAttribute = getAttribute.bind($node) | |
$node.setAttribute = setAttribute.bind($node) | |
return $node | |
} | |
wrapOther($, node, lastInNode) { | |
const $node = $(node) | |
$node.text = node.nodeValue | |
$node.replace = replace.bind($node) | |
if (node.nodeType === 3) { | |
$node.lastInTextNode = lastInNode | |
} | |
return $node | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment