Created
January 24, 2021 23:26
-
-
Save doriansmiley/7b95f7d672649e1884268fbf36c5a209 to your computer and use it in GitHub Desktop.
SSR with Puppeteer and Shadow DOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // eslint-disable-next-line @typescript-eslint/no-var-requires | |
| const express = require('express'); | |
| // eslint-disable-next-line @typescript-eslint/no-var-requires | |
| const path = require('path'); | |
| const app = express(); | |
| // eslint-disable-next-line @typescript-eslint/no-var-requires | |
| const cors = require('cors'); | |
| const port = process.argv[2] || 3000; | |
| // eslint-disable-next-line @typescript-eslint/no-var-requires | |
| const ssr = require('./ssr/ssr'); | |
| const corsOptions = { | |
| origin: [ | |
| `http://localhost:${port}` | |
| ], | |
| credentials: true | |
| }; | |
| app.use(cors(corsOptions)); | |
| app.get('/ssr', async (req, res, next) => { | |
| // TODO replace hard coded value | |
| console.log(req.query.url); | |
| console.log(req.query.selector); | |
| const {html, ttRenderMs} = await ssr(req.query.url, undefined, req.query.selector); | |
| // Add Server-Timing! See https://w3c.github.io/server-timing/. | |
| res.set('Server-Timing', `Prerender;dur=${ttRenderMs};desc="Headless render time (ms)"`); | |
| return res.status(200).send(html); // Serve prerendered page as response. | |
| }); | |
| app.use('/', express.static(path.join(__dirname,'.'))); | |
| app.listen(port, () => console.log(`Static server listening on port ${port}!`)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const puppeteer = require('puppeteer'); | |
| // https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 | |
| // Dont download all resources, we just need the HTML | |
| // Also, this is huge performance/response time boost | |
| const blockedResourceTypes = [ | |
| 'image', | |
| 'media', | |
| 'font', | |
| 'texttrack', | |
| 'object', | |
| 'beacon', | |
| 'csp_report', | |
| 'imageset', | |
| ]; | |
| const skippedResources = [ | |
| 'quantserve', | |
| 'adzerk', | |
| 'doubleclick', | |
| 'adition', | |
| 'exelator', | |
| 'sharethrough', | |
| 'cdn.api.twitter', | |
| 'google-analytics', | |
| 'googletagmanager', | |
| 'google', | |
| 'fontawesome', | |
| 'facebook', | |
| 'analytics', | |
| 'optimizely', | |
| 'clicktale', | |
| 'mixpanel', | |
| 'zedo', | |
| 'clicksor', | |
| 'tiqcdn', | |
| ]; | |
| const RENDER_CACHE = new Map(); | |
| /** | |
| * https://developers.google.com/web/tools/puppeteer/articles/ssr#reuseinstance | |
| * @param {string} url URL to prerender. | |
| * @param {string} browserWSEndpoint Optional remote debugging URL. If | |
| * provided, Puppeteer's reconnects to the browser instance. Otherwise, | |
| * a new browser instance is launched. | |
| */ | |
| async function ssr (url, browserWSEndpoint, selector) { | |
| if (RENDER_CACHE.has(url)) { | |
| return {html: RENDER_CACHE.get(url), ttRenderMs: 0}; | |
| } | |
| const start = Date.now(); | |
| const args = puppeteer.defaultArgs(); | |
| // IMPORTANT: you can't render shadow DOM without this flag | |
| // getInnerHTML will be undefined without it | |
| args.push('--enable-experimental-web-platform-features'); | |
| const browser = await puppeteer.launch({ | |
| args | |
| }); | |
| const page = await browser.newPage(); | |
| try { | |
| await page.setRequestInterception(true); | |
| page.on('request', request => { | |
| const requestUrl = request._url.split('?')[0].split('#')[0]; | |
| if ( | |
| blockedResourceTypes.indexOf(request.resourceType()) !== -1 || | |
| skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) | |
| ) { | |
| request.abort(); | |
| } else { | |
| request.continue(); | |
| } | |
| }); | |
| // Inject <base> on page to relative resources load properly. | |
| await page.evaluate(url => { | |
| const base = document.createElement('base'); | |
| base.href = url; | |
| // Add to top of head, before all other resources. | |
| document.head.prepend(base); | |
| }, url); | |
| await page.goto(url, { | |
| timeout: 25000, | |
| waitUntil: 'networkidle0' | |
| }); | |
| await page.waitForFunction(selector => !!document.querySelector(selector)?.shadowRoot, { | |
| polling: 'mutation', | |
| }, selector); | |
| // Remove scripts and html imports. They've already executed. | |
| await page.evaluate(() => { | |
| const elements = document.querySelectorAll('script, link[rel="import"]'); | |
| elements.forEach(e => e.remove()); | |
| }); | |
| const html = await page.$eval('html', (element) => { | |
| return element.getInnerHTML({includeShadowRoots: true}); | |
| }); | |
| // Close the page we opened here (not the browser). | |
| await page.close(); | |
| const ttRenderMs = Date.now() - start; | |
| console.info(`Headless rendered page in: ${ttRenderMs}ms`); | |
| RENDER_CACHE.set(url, html); // cache rendered page. | |
| return {html, ttRenderMs}; | |
| } | |
| catch (e) { | |
| const html = e.toString(); | |
| console.warn({ message: `URL: ${url} Failed with message: ${html}` }); | |
| return { html, status: 500 }; | |
| } | |
| }; | |
| module.exports = ssr; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <template shadowroot="open"> | |
| <li data-component-root="root"> | |
| <img data-skin-part="thumbnail" id="image"/></img> | |
| <label data-skin-part="caption" for="image"></label> | |
| </li> | |
| </template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment