Created
April 20, 2021 09:53
-
-
Save glen-84/4ad04cab7ed3ec74a2e6ed04e6c12348 to your computer and use it in GitHub Desktop.
Koa middleware for Rendertron.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import axios from "axios"; | |
import type {IncomingMessage} from "node:http"; | |
import type {Middleware} from "koa"; | |
/** | |
* A default set of user agent patterns for bots/crawlers that do not perform | |
* well with pages that require JavaScript. | |
*/ | |
export const botUserAgents = [ | |
// spell-checker:disable | |
"Baiduspider", | |
"bingbot", | |
"Embedly", | |
"facebookexternalhit", | |
"LinkedInBot", | |
"outbrain", | |
"pinterest", | |
"quora link preview", | |
"rogerbot", | |
"showyoubot", | |
"Slackbot", | |
"TelegramBot", | |
"Twitterbot", | |
"vkShare", | |
"W3C_Validator", | |
"WhatsApp" | |
// spell-checker:enable | |
]; | |
/** | |
* A default set of file extensions for static assets that do not need to be | |
* proxied. | |
*/ | |
const staticFileExtensions = [ | |
"ai", | |
"avi", | |
"css", | |
"dat", | |
"dmg", | |
"doc", | |
"doc", | |
"exe", | |
"flv", | |
"gif", | |
"ico", | |
"iso", | |
"jpeg", | |
"jpg", | |
"js", | |
"less", | |
"m4a", | |
"m4v", | |
"mov", | |
"mp3", | |
"mp4", | |
"mpeg", | |
"mpg", | |
"pdf", | |
"png", | |
"ppt", | |
"psd", | |
"rar", | |
"rss", | |
"svg", | |
"swf", | |
"tif", | |
"torrent", | |
"ttf", | |
"txt", | |
"wav", | |
"wmv", | |
"woff", | |
"xls", | |
"xml", | |
"zip" | |
]; | |
/** | |
* Options for makeMiddleware. | |
*/ | |
export interface Options { | |
/** | |
* Base URL of the Rendertron proxy service. Required. | |
*/ | |
proxyUrl: string; | |
/** | |
* Regular expression to match user agent to proxy. Defaults to a set of bots | |
* that do not perform well with pages that require JavaScript. | |
*/ | |
userAgentPattern?: RegExp; | |
/** | |
* Regular expression used to exclude request URL paths. Defaults to a set of | |
* typical static asset file extensions. | |
*/ | |
excludeUrlPattern?: RegExp; | |
/** | |
* Force web components polyfills to be loaded and enabled. Defaults to false. | |
*/ | |
injectShadyDom?: boolean; | |
/** | |
* Millisecond timeout for proxy requests. Defaults to 11000 milliseconds. | |
*/ | |
timeout?: number; | |
/** | |
* If a forwarded host header is found and matches one of the hosts in this | |
* array, then that host will be used for the request to the rendertron server | |
* instead of the actual host of the request. | |
* This is useful if this middleware is running on a different host | |
* which is proxied behind the actual site, and the rendertron server should | |
* request the main site. | |
*/ | |
allowedForwardedHosts?: string[]; | |
/** | |
* Header used to determine the forwarded host that should be used when | |
* building the URL to be rendered. Only applicable if `allowedForwardedHosts` | |
* is not empty. | |
* Defaults to `"X-Forwarded-Host"`. | |
*/ | |
forwardedHostHeader?: string; | |
} | |
/** | |
* Create a new Koa middleware function that proxies requests to a | |
* Rendertron bot rendering service. | |
*/ | |
export function makeMiddleware(options: Options): Middleware { | |
if (!options.proxyUrl) { | |
throw new Error("Must set options.proxyUrl."); | |
} | |
let {proxyUrl} = options; | |
if (!proxyUrl.endsWith("/")) { | |
proxyUrl += "/"; | |
} | |
const userAgentPattern = options.userAgentPattern ?? new RegExp(botUserAgents.join("|"), "iu"); | |
const excludeUrlPattern = | |
options.excludeUrlPattern ?? new RegExp(`\\.(${staticFileExtensions.join("|")})$`, "iu"); | |
const injectShadyDom = Boolean(options.injectShadyDom); | |
// The Rendertron service itself has a hard limit of 10 seconds to render, so | |
// let's give a little more time than that by default. | |
const timeout = options.timeout ?? 11000; // Milliseconds. | |
const allowedForwardedHosts = options.allowedForwardedHosts ?? []; | |
const forwardedHostHeader = allowedForwardedHosts.length | |
? options.forwardedHostHeader ?? "X-Forwarded-Host" | |
: null; | |
const rendertronMiddleware: Middleware = async (ctx, next) => { | |
const ua = ctx.headers["user-agent"]; | |
if (ua === undefined || !userAgentPattern.test(ua) || excludeUrlPattern.test(ctx.path)) { | |
await next(); | |
return; | |
} | |
const forwardedHost = forwardedHostHeader !== null && ctx.get(forwardedHostHeader); | |
const host = | |
forwardedHost !== false && allowedForwardedHosts.includes(forwardedHost) | |
? forwardedHost | |
: ctx.get("host"); | |
const incomingUrl = `${ctx.protocol}://${host + ctx.originalUrl}`; | |
let renderUrl = proxyUrl + encodeURIComponent(incomingUrl); | |
if (injectShadyDom) { | |
renderUrl += "?wc-inject-shadydom=true"; | |
} | |
try { | |
const response = await axios.get<IncomingMessage>(renderUrl, { | |
responseType: "stream", | |
timeout | |
}); | |
ctx.body = response.data; | |
} catch (e: unknown) { | |
if (axios.isAxiosError(e)) { | |
// eslint-disable-next-line no-console -- Okay in this context. | |
console.error( | |
`[rendertron middleware] ${e.code ?? "unknown"} error fetching ${renderUrl}` | |
); | |
} | |
await next(); | |
} | |
}; | |
return rendertronMiddleware; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment