Last active
September 16, 2022 07:43
-
-
Save zerebral/e8b985bbaacc166cee39ee750c64090d to your computer and use it in GitHub Desktop.
Puppeteer | Wait till all content is loaded in the browser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const waitTillHTMLRendered = async (page, timeout = 30000) => { | |
const checkDurationMsecs = 1000; | |
const maxChecks = timeout / checkDurationMsecs; | |
let lastHTMLSize = 0; | |
let checkCounts = 1; | |
let countStableSizeIterations = 0; | |
const minStableSizeIterations = 3; | |
while(checkCounts++ <= maxChecks){ | |
let html = await page.content(); | |
let currentHTMLSize = html.length; | |
let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length); | |
console.log('last: ', lastHTMLSize, ' <> curr: ', currentHTMLSize, " body html size: ", bodyHTMLSize); | |
if(lastHTMLSize != 0 && currentHTMLSize == lastHTMLSize) | |
countStableSizeIterations++; | |
else | |
countStableSizeIterations = 0; //reset the counter | |
if(countStableSizeIterations >= minStableSizeIterations) { | |
console.log("Page rendered fully.."); | |
break; | |
} | |
lastHTMLSize = currentHTMLSize; | |
await page.waitFor(checkDurationMsecs); | |
} | |
}; | |
module.exports = async ({ page }) => { | |
//let url = "https://cpo.ford.com/Search?LeadSource=FordDirectUsed&AltLeadSource=FordCPO&ResultsPerPage=25&Distance=25&ZipCode=94203&Make=Ford&Model=&ModelYearLow=&ModelYearHigh=&PriceHigh=&Page=1&SortBy=distance&SortByDescending=false#"; | |
let url = "https://www.autoscout24.com/lst/?sort=price&desc=0&ustate=N%2CU&size=20&page=1&atype=C&"; | |
await page.goto(url, {'timeout': 10000, 'waitUntil':'networkidle2'}); | |
await waitTillHTMLRendered(page) | |
const data = await page.content() | |
console.log("Final page content size : ", data.length) | |
return {data, type:"application/html"} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
As the JS scripts could still be executing even after networkidle2 event has been fired - perhaps the best way to see if all JS based rendering has finished on the web page is to keep an eye on the modifications to the HTML source thats getting manipulated by the browser?
The function waitTillHTMLRendered watches the rendered HTML content length and returns only when the HTML source length goes stable or the specified timeout occurs. You can quickly try this out here to see how its doing - https://chrome.browserless.io/#