Last active
August 3, 2020 12:39
-
-
Save martinschierle/17384e78c3449b01174443d294a43371 to your computer and use it in GitHub Desktop.
Small puppeteer script to run over a domain, crawl random urls, capture screenshots with lcp and cls elems highlighted, and also writing out a heatmap of lcp and cls bounding boxes across all crawled pages.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const { createCanvas, loadImage } = require('canvas') | |
const mustache = require('mustache') | |
var fs = require('fs'); | |
const fsExtra = require('fs-extra') | |
let MAX_URLS = 50; | |
let TEMPLATE = fs.readFileSync('template.html', 'utf8'); | |
// using googlebot user agent might get rid of some cookie alerts | |
//const agent = "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z‡ Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; | |
const Good3G = { | |
'offline': false, | |
'downloadThroughput': 1.5 * 1024 * 1024 / 8, | |
'uploadThroughput': 750 * 1024 / 8, | |
'latency': 40 | |
}; | |
const phone = puppeteer.devices['Nexus 5X']; | |
function shuffle(a) { | |
var j, x, i; | |
for (i = a.length - 1; i > 0; i--) { | |
j = Math.floor(Math.random() * (i + 1)); | |
x = a[i]; | |
a[i] = a[j]; | |
a[j] = x; | |
} | |
return a; | |
} | |
function injectJs() { | |
// inject some css to highlight the offending elements | |
var style = document.createElement('style'); | |
style.type = 'text/css'; | |
style.innerHTML = '.cls_elem {border: 5px solid green; box-sizing: border-box;} .lcp_elem{border: 5px solid red; box-sizing: border-box;}'; | |
document.getElementsByTagName('head')[0].appendChild(style); | |
window.lcp = {value: -1, size: -1}; | |
window.cls = {value: 0}; | |
window.lcp_elem = null; | |
window.cls_elems = []; | |
var cssPath = function (el) { | |
var path = []; | |
while ( | |
(el.nodeName.toLowerCase() != 'html') && | |
(el = el.parentNode) && | |
path.unshift(el.nodeName.toLowerCase() + | |
(el.id ? '#' + el.id : '') + | |
(el.className ? '.' + el.className.replace(/\s+/g, ".") : '')) | |
); | |
return path.join(" > "); | |
} | |
// Create a PerformanceObserver that calls `updateLCP` for each entry. | |
const po = new PerformanceObserver((entryList) => { | |
entryList.getEntries().forEach(function(entry) { | |
console.log(entry); | |
if(entry.size > window.lcp.size) { | |
let e = entry.element; | |
if(!(e instanceof HTMLElement)) e = e.parentElement; | |
e.classList.add("lcp_elem"); | |
if(window.lcp_elem) window.lcp_elem.classList.remove("lcp_elem"); | |
window.lcp.size = entry.size; | |
window.lcp.value = entry.startTime; | |
window.lcp_elem = e; | |
window.lcp.tagName = e.tagName; | |
window.lcp.classes = e.getAttribute("class"); | |
window.lcp.path = cssPath(e); | |
} | |
}); | |
}); | |
// Observe entries of type `largest-contentful-paint`, including buffered entries, | |
// i.e. entries that occurred before calling `observe()` below. | |
po.observe({ | |
type: 'largest-contentful-paint', | |
buffered: true, | |
}); | |
try { | |
const cls_po = new PerformanceObserver((list) => { | |
for (const entry of list.getEntries()) { | |
console.log(entry); | |
window.cls.value += entry.value; | |
if(entry.sources && entry.sources.length>0) { | |
// find the source of maximum size | |
for(var i = 0;i < entry.sources.length; i++) { | |
let source = entry.sources[i]; | |
let e = source.node; | |
if(!e) continue; | |
if(!(e instanceof HTMLElement)) e = e.parentElement; | |
e.classList.add("cls_elem"); | |
window.cls_elems.push(e); | |
} | |
} | |
} | |
}); | |
cls_po.observe({type: 'layout-shift', buffered: true}); | |
} catch (e) { | |
console.log(e.message); | |
// Do nothing if the browser doesn't support this API. | |
} | |
} | |
async function doBatch(domain, max_urls) { | |
// reset output file and images dir | |
fsExtra.emptyDirSync("output"); | |
fs.mkdirSync("output/images"); | |
// we start with an empty url array, and add newly crawled ones to that | |
let urls = []; | |
urls.push(domain); | |
// array to remember the already processed urls, to not cover them twice | |
let processedURLs = {}; | |
//json results object to write towards mustache at the end | |
let results = {domain: domain, items: []}; | |
//initialize two canvases for drawing output heatmaps | |
const cls_canvas = createCanvas(phone.viewport.width, phone.viewport.height); | |
const cls_ctx = cls_canvas.getContext('2d'); | |
const lcp_canvas = createCanvas(phone.viewport.width, phone.viewport.height); | |
const lcp_ctx = lcp_canvas.getContext('2d'); | |
const browser = await puppeteer.launch({ | |
args: ['--no-sandbox'], | |
//headless: false, | |
//executablePath: '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary', | |
timeout: 10000 | |
}); | |
const page = await browser.newPage(); | |
//await page.evaluateOnNewDocument(injectJs); | |
//phone.userAgent = agent; | |
await page.emulate(phone); | |
const client = await page.target().createCDPSession(); | |
await client.send('Network.enable'); | |
await client.send('ServiceWorker.enable'); | |
//await client.send('Network.emulateNetworkConditions', Good3G); | |
//await client.send('Emulation.setCPUThrottlingRate', { rate: 4 }); | |
while(Object.keys(processedURLs).length < max_urls && urls.length > 0) { | |
urls = shuffle(urls); | |
const url = urls.pop(); | |
processedURLs[url] = true; | |
console.log("Processing: " + url); | |
try { | |
// inject a function with the code from https://web.dev/cls/#measure-cls-in-javascript | |
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000}); | |
//page.on('console', consoleObj => console.log(consoleObj.text())); | |
await page.waitFor(2000); // let's give it a bit more time, to be sure everything's loaded | |
//get links and append | |
let links = await page.$$("a[href*='" + new URL(domain).hostname + "']"); | |
for(var j = 0; j < links.length; j++ ) { | |
let propertyHandle = await links[j].getProperty("href"); | |
const href = await propertyHandle.jsonValue(); | |
if(processedURLs[href])continue; | |
urls.push(href); | |
} | |
console.log("Injecting JS..."); | |
await Promise.race([ | |
page.evaluate(injectJs), | |
page.waitFor(5000) | |
]); | |
page.waitFor(2000); | |
console.log("Gathering data..."); | |
let url_results = await Promise.race([ | |
page.evaluate(function() {return {'cls': window.cls, 'lcp': window.lcp}}), | |
page.waitFor(5000) | |
]); | |
if(!url_results) { | |
console.log("Couldn't retrieve results."); | |
continue; | |
} | |
let cls = url_results.cls; | |
let lcp = url_results.lcp; | |
let lcp_elem = await page.$(".lcp_elem"); | |
let cls_elems = await page.$$(".cls_elem"); | |
console.log("Getting screenshot..."); | |
let screenshot_path = "output/images/" + url.replace(/[^a-zA-Z0-9]/gi, "") + ".jpeg"; | |
try { | |
await page.screenshot({path: screenshot_path, type: "jpeg", "quality": 30}); | |
} catch(e) {console.log("Can't take screenshot: " + e.message)} | |
//adding to lcp heatmap | |
if(lcp_elem && lcp_elem.boundingBox) { | |
let box = await lcp_elem.boundingBox(); | |
let alpha = 3.0 / max_urls; | |
lcp_ctx.fillStyle = 'rgba(255, 0, 0, ' + alpha + ')'; | |
lcp_ctx.fillRect(box.x, box.y, box.width, box.height); | |
} | |
//adding to cls heatmap | |
if(cls_elems && cls_elems.length > 0) { | |
for(k = 0; k < cls_elems.length; k++) { | |
let cls_elem = cls_elems[k]; | |
if(!cls_elem.boundingBox) continue; | |
let box = await cls_elem.boundingBox(); | |
let alpha = 1.0 / max_urls; | |
cls_ctx.fillStyle = 'rgba(0, 255, 0, ' + alpha + ')'; | |
cls_ctx.fillRect(box.x, box.y, box.width, box.height); | |
} | |
} | |
results.items.push({url: url, lcp: lcp.value, cls: cls.value, lcpTagName: lcp.tagName, lcpClasses: lcp.classes, lcpPath: lcp.path, screenshot: screenshot_path}); | |
//out = new URL(url).hostname + "," + url + "," + lcp.value + ", " + cls.value + "," + lcp.tagName + "," + lcp.classes + "," + lcp.path + "," + screenshot_path; | |
//console.log(out); | |
} catch (error) { | |
console.log(error); | |
//process.exit(0); | |
} | |
} | |
// write out lcp heatmap | |
var buf = lcp_canvas.toBuffer(); | |
fs.writeFileSync("output/lcp_heatmap.png", buf); | |
results.lcpHeatmap = "lcp_heatmap.png"; | |
// write out cls heatmap | |
var buf = cls_canvas.toBuffer(); | |
fs.writeFileSync("output/cls_heatmap.png", buf); | |
results.clsHeatmap = "cls_heatmap.png"; | |
// write out result html | |
var rendered = mustache.render(TEMPLATE, results); | |
fs.writeFileSync('output/index.html', rendered) | |
} | |
var myArgs = process.argv.slice(2); | |
if(!myArgs || myArgs.length === 0) { | |
console.log("Please give a domain to crawl as argument!"); | |
} | |
else { | |
let domain = myArgs[0]; | |
if(!domain.startsWith("http")) domain = "https://" + domain; | |
console.log("Crawling domain: " + domain); | |
let max = MAX_URLS; | |
if(myArgs.length == 2) max = parseInt(myArgs[1]); | |
doBatch(domain, max).then(res => {console.log("Done!");process.exit(0);}); | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<style> | |
#mainTable tr:nth-child(odd){ | |
background-color: lightblue; | |
} | |
thead { | |
font-weight: bold; | |
background-color: lightgrey; | |
} | |
.screenshot { | |
margin:20px; | |
width:150px; | |
} | |
.heatmap { | |
width:400px; | |
} | |
td { | |
word-break:break-all; | |
width: 16%; | |
} | |
</style> | |
</head> | |
<body onload="renderHello()"> | |
<h1>Analysis results for domain {{domain}}</h1> | |
<h2>Heatmaps</h2> | |
<table> | |
<thead> | |
<tr> | |
<td>LCP Heatmap</td> | |
<td>CLS Heatmap</td> | |
</tr> | |
</thead> | |
<tbody> | |
<tr> | |
<td><img src="{{ lcpHeatmap }}" class="heatmap"></td> | |
<td><img src="{{ clsHeatmap }}" class="heatmap"></td> | |
</tr> | |
</tbody> | |
</table> | |
<h2>Detailed Results</h2> | |
<table> | |
<thead> | |
<tr> | |
<td width="10%">URL</td> | |
<td>LCP</td> | |
<td>CLS</td> | |
<td>LCP Tag Name</td> | |
<td>LCP CLasses</td> | |
<td>LCP DOM Path</td> | |
<td>Screenshot</td> | |
</tr> | |
</thead> | |
<tbody id="mainTable"> | |
{{#items}} | |
<tr> | |
<td>{{url}}</td> | |
<td>{{lcp}}</td> | |
<td>{{cls}}</td> | |
<td>{{lcpTagName}}</td> | |
<td>{{lcpClasses}}</td> | |
<td>{{lcpPath}}</td> | |
<td><img src="../{{screenshot}}" class="screenshot"></td> | |
</tr> | |
{{/items}} | |
</tbody> | |
</table> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment