Skip to content

Instantly share code, notes, and snippets.

@doriansmiley
Created January 24, 2021 23:26
Show Gist options
  • Select an option

  • Save doriansmiley/7b95f7d672649e1884268fbf36c5a209 to your computer and use it in GitHub Desktop.

Select an option

Save doriansmiley/7b95f7d672649e1884268fbf36c5a209 to your computer and use it in GitHub Desktop.
SSR with Puppeteer and Shadow DOM
// eslint-disable-next-line @typescript-eslint/no-var-requires
const express = require('express');
// eslint-disable-next-line @typescript-eslint/no-var-requires
const path = require('path');
const app = express();
// eslint-disable-next-line @typescript-eslint/no-var-requires
const cors = require('cors');
const port = process.argv[2] || 3000;
// eslint-disable-next-line @typescript-eslint/no-var-requires
const ssr = require('./ssr/ssr');
const corsOptions = {
origin: [
`http://localhost:${port}`
],
credentials: true
};
app.use(cors(corsOptions));
app.get('/ssr', async (req, res, next) => {
// TODO replace hard coded value
console.log(req.query.url);
console.log(req.query.selector);
const {html, ttRenderMs} = await ssr(req.query.url, undefined, req.query.selector);
// Add Server-Timing! See https://w3c.github.io/server-timing/.
res.set('Server-Timing', `Prerender;dur=${ttRenderMs};desc="Headless render time (ms)"`);
return res.status(200).send(html); // Serve prerendered page as response.
});
app.use('/', express.static(path.join(__dirname,'.')));
app.listen(port, () => console.log(`Static server listening on port ${port}!`));
const puppeteer = require('puppeteer');
// https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952
// Dont download all resources, we just need the HTML
// Also, this is huge performance/response time boost
const blockedResourceTypes = [
'image',
'media',
'font',
'texttrack',
'object',
'beacon',
'csp_report',
'imageset',
];
const skippedResources = [
'quantserve',
'adzerk',
'doubleclick',
'adition',
'exelator',
'sharethrough',
'cdn.api.twitter',
'google-analytics',
'googletagmanager',
'google',
'fontawesome',
'facebook',
'analytics',
'optimizely',
'clicktale',
'mixpanel',
'zedo',
'clicksor',
'tiqcdn',
];
const RENDER_CACHE = new Map();
/**
* https://developers.google.com/web/tools/puppeteer/articles/ssr#reuseinstance
* @param {string} url URL to prerender.
* @param {string} browserWSEndpoint Optional remote debugging URL. If
* provided, Puppeteer's reconnects to the browser instance. Otherwise,
* a new browser instance is launched.
*/
async function ssr (url, browserWSEndpoint, selector) {
if (RENDER_CACHE.has(url)) {
return {html: RENDER_CACHE.get(url), ttRenderMs: 0};
}
const start = Date.now();
const args = puppeteer.defaultArgs();
// IMPORTANT: you can't render shadow DOM without this flag
// getInnerHTML will be undefined without it
args.push('--enable-experimental-web-platform-features');
const browser = await puppeteer.launch({
args
});
const page = await browser.newPage();
try {
await page.setRequestInterception(true);
page.on('request', request => {
const requestUrl = request._url.split('?')[0].split('#')[0];
if (
blockedResourceTypes.indexOf(request.resourceType()) !== -1 ||
skippedResources.some(resource => requestUrl.indexOf(resource) !== -1)
) {
request.abort();
} else {
request.continue();
}
});
// Inject <base> on page to relative resources load properly.
await page.evaluate(url => {
const base = document.createElement('base');
base.href = url;
// Add to top of head, before all other resources.
document.head.prepend(base);
}, url);
await page.goto(url, {
timeout: 25000,
waitUntil: 'networkidle0'
});
await page.waitForFunction(selector => !!document.querySelector(selector)?.shadowRoot, {
polling: 'mutation',
}, selector);
// Remove scripts and html imports. They've already executed.
await page.evaluate(() => {
const elements = document.querySelectorAll('script, link[rel="import"]');
elements.forEach(e => e.remove());
});
const html = await page.$eval('html', (element) => {
return element.getInnerHTML({includeShadowRoots: true});
});
// Close the page we opened here (not the browser).
await page.close();
const ttRenderMs = Date.now() - start;
console.info(`Headless rendered page in: ${ttRenderMs}ms`);
RENDER_CACHE.set(url, html); // cache rendered page.
return {html, ttRenderMs};
}
catch (e) {
const html = e.toString();
console.warn({ message: `URL: ${url} Failed with message: ${html}` });
return { html, status: 500 };
}
};
module.exports = ssr;
<template shadowroot="open">
<li data-component-root="root">
<img data-skin-part="thumbnail" id="image"/></img>
<label data-skin-part="caption" for="image"></label>
</li>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment