Created
August 20, 2020 07:48
-
-
Save martinschierle/a549219714a4b72384b36f72f0aff805 to your computer and use it in GitHub Desktop.
Crawler to crawl for layout shift sources
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const { createCanvas, loadImage } = require('canvas') | |
const mustache = require('mustache') | |
var fs = require('fs'); | |
const fsExtra = require('fs-extra') | |
let MAX_URLS = 50; | |
let TEMPLATE = fs.readFileSync('template.html', 'utf8'); | |
// using googlebot user agent might get rid of some cookie alerts | |
//const agent = "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z‡ Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; | |
const Good3G = { | |
'offline': false, | |
'downloadThroughput': 1.5 * 1024 * 1024 / 8, | |
'uploadThroughput': 750 * 1024 / 8, | |
'latency': 40 | |
}; | |
const phone = puppeteer.devices['Nexus 5X']; | |
function injectJs() { | |
window.shifts = []; | |
function didSizeChange(src) { | |
if (src.previousRect.width !== src.currentRect.width) return true; | |
if (src.previousRect.height !== src.currentRect.height) return true; | |
return false; | |
} | |
function getLastResources(startTime, endTime, regex) { | |
let results = []; | |
let entries = performance.getEntriesByType('resource') | |
for(let i = 0; i < entries.length; i++) { | |
let e = entries[i]; | |
if(regex && !e.name.match(regex)) continue; | |
if(e.responseEnd < endTime && e.responseEnd > startTime) results.push(e); | |
} | |
return results; | |
} | |
let po = new PerformanceObserver((list) => { | |
for (const entry of list.getEntries()) { | |
//console.log(entry); | |
let val = entry.value; | |
//let lastRes = getResources(entry.startTime - 150, entry.startTime); | |
let lastFonts = getLastResources(entry.startTime-150, entry.startTime, /(woff)|(ttf)/); | |
let lastAds = getLastResources(entry.startTime-400, entry.startTime, /\/ads\?/); | |
let lastImgs = getLastResources(entry.startTime-150, entry.startTime, /\.(jpg)|(png)|(gif)|(svg)|(jpeg)|(webp)/); | |
// if we have a text node which changed size, and a font loaded, then attribute the layout jump to the font | |
// not completely true, could also be due to CSS | |
if(lastFonts.length > 0 ) { | |
console.log("Probing for fonts:"); | |
console.log(lastFonts); | |
console.log(entry); | |
let last = lastFonts[lastFonts.length-1]; | |
let diff = entry.startTime - last.responseEnd; | |
//is there a source which is just an icon or text node? | |
for(var i = 0; i < entry.sources.length; i++) { | |
let src = entry.sources[i]; | |
if(!didSizeChange(src)) continue; | |
if(src.node.nodeType === Node.TEXT_NODE || src.node.tagName === "P" || src.node.tagName === "I" || src.node.tagName === "A" ) { | |
window.shifts.push({url: document.location.href, cause: "FONT", ressource: last.name, impact: val, timegap: diff}); | |
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val); | |
} | |
} | |
} | |
// if an image was loaded, and the size of an image node with same src changed, attribute shift to the image | |
if(lastImgs.length > 0 ) { | |
console.log("Probing for images:"); | |
console.log(lastImgs); | |
console.log(entry); | |
//is there a source which is just an image node? | |
for(var i = 0; i < entry.sources.length; i++) { | |
let src = entry.sources[i]; | |
if(!didSizeChange(src)) continue; | |
console.log("Probing for tagname:"); | |
if(src.node.tagName === "IMG" || src.node.tagName === "PICTURE"|| src.node.tagName === "DIV") { | |
// find the relevant image | |
let last = null; | |
for(var j = 0; j < lastImgs.length; j++) { | |
let filename = new URL(lastImgs[j].name).pathname.split('/').pop(); | |
console.log("Probing for filename: " + filename); | |
if(src.node.outerHTML.indexOf(filename) >= 0) { | |
last = lastImgs[j]; | |
break; | |
} | |
} | |
if(!last) continue; | |
let diff = entry.startTime - last.responseEnd; | |
window.shifts.push({url: document.location.href, cause: "IMAGE", ressource: last.name, impact: val, timegap: diff}); | |
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val); | |
} | |
} | |
} | |
// if an ad was loaded, and the size of a div containing an ad changed, attribute shift to the image | |
if(lastAds.length > 0 ) { | |
console.log("Probing for ads:"); | |
console.log(lastAds); | |
console.log(entry); | |
let last = lastAds[lastAds.length-1]; | |
let diff = entry.startTime - last.responseEnd; | |
//is there a source which might contain an ad? | |
for(var i = 0; i < entry.sources.length; i++) { | |
let src = entry.sources[i]; | |
if(!didSizeChange(src)) continue; | |
console.log("Probing tagname"); | |
if(src.node.tagName === "DIV" || src.node.tagName === "SPAN" || src.node.tagName === "IFRAME"|| src.node.tagName === "SECTION") { | |
console.log("Probing content"); | |
if(src.node.outerHTML.indexOf("google_ads_iframe") >= 0) { | |
window.shifts.push({url: document.location.href, cause: "ADS", ressource: last.name, impact: val, timegap: diff}); | |
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val); | |
} | |
} | |
} | |
} | |
} | |
}); | |
po.observe({type: 'layout-shift', buffered: true}); | |
} | |
async function doBatch(urls, max) { | |
// reset output file and images dir | |
fsExtra.emptyDirSync("output"); | |
fs.mkdirSync("output/images"); | |
//json results object to write towards mustache at the end | |
let results = []; | |
const browser = await puppeteer.launch({ | |
args: ['--no-sandbox'], | |
//headless: false, | |
//executablePath: '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary', | |
timeout: 10000 | |
}); | |
const page = await browser.newPage(); | |
//await page.evaluateOnNewDocument(injectJs); | |
//phone.userAgent = agent; | |
await page.emulate(phone); | |
const client = await page.target().createCDPSession(); | |
await client.send('Network.enable'); | |
await client.send('ServiceWorker.enable'); | |
await client.send('Network.emulateNetworkConditions', Good3G); | |
//await client.send('Emulation.setCPUThrottlingRate', { rate: 4 }); | |
for(var k = 0; k < Math.min(max, urls.length); k++) { | |
const url = urls[k]; | |
console.log("Processing: " + url); | |
try { | |
// inject a function with the code from https://web.dev/cls/#measure-cls-in-javascript | |
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000}); | |
//page.on('console', consoleObj => console.log(consoleObj.text())); | |
await page.waitFor(2000); // let's give it a bit more time, to be sure everything's loaded | |
console.log("Injecting JS..."); | |
await Promise.race([ | |
page.evaluate(injectJs), | |
page.waitFor(5000) | |
]); | |
page.waitFor(2000); | |
console.log("Gathering data..."); | |
let url_results = await Promise.race([ | |
page.evaluate(function() {return window.shifts}), | |
page.waitFor(5000) | |
]); | |
if(!url_results) { | |
console.log("Couldn't retrieve results."); | |
continue; | |
} | |
results = results.concat(url_results); | |
} catch (error) { | |
console.log(error); | |
//process.exit(0); | |
} | |
} | |
// write out result html | |
results.sort((a, b) => (a.impact > b.impact) ? -1 : 1) | |
var rendered = mustache.render(TEMPLATE, {items: results}); | |
fs.writeFileSync('output/index.html', rendered) | |
} | |
let urls = fs.readFileSync('input.csv').toString().split("\n"); | |
doBatch(urls, 200).then(res => {console.log("Done!");process.exit(0);}); | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<style> | |
#mainTable tr:nth-child(odd){ | |
background-color: lightblue; | |
} | |
thead { | |
font-weight: bold; | |
background-color: lightgrey; | |
} | |
.screenshot { | |
margin:20px; | |
width:150px; | |
} | |
.heatmap { | |
width:400px; | |
} | |
td { | |
word-break:break-all; | |
width: 16%; | |
} | |
</style> | |
</head> | |
<body onload="renderHello()"> | |
<h1>Analysis results for Font-related layout shifts</h1> | |
<table> | |
<thead> | |
<tr> | |
<td width="10%">URL</td> | |
<td>Cause</td> | |
<td>Ressource</td> | |
<td>CLS Impact</td> | |
<td>Timegap</td> | |
</tr> | |
</thead> | |
<tbody id="mainTable"> | |
{{#items}} | |
<tr> | |
<td>{{url}}</td> | |
<td>{{cause}}</td> | |
<td>{{ressource}}</td> | |
<td>{{impact}}</td> | |
<td>{{timegap}}</td> | |
</tr> | |
{{/items}} | |
</tbody> | |
</table> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment