Created
December 5, 2023 11:37
-
-
Save ehzawad/149326926d38ab2ae63a087a9ab56aba to your computer and use it in GitHub Desktop.
webpage to html converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```js | |
// const puppeteer = require('puppeteer'); | |
// async function convertWebpageToPDF(url, outputFilename) { | |
// const browser = await puppeteer.launch({ headless: "new" }); | |
// const page = await browser.newPage(); | |
// // Increase the timeout to 60 seconds (60000 milliseconds) | |
// await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); | |
// // Replace '.cookie-accept-button' with the actual selector for the cookie acceptance button | |
// const cookiesButtonSelector = '.cookie-accept-button'; | |
// if (await page.$(cookiesButtonSelector) !== null) { | |
// await page.click(cookiesButtonSelector); | |
// } | |
// // Scroll through the entire webpage | |
// await autoScroll(page); | |
// // Capture the full webpage as PDF | |
// await page.pdf({ path: outputFilename, format: 'A4', printBackground: true }); | |
// await browser.close(); | |
// console.log(`PDF created: ${outputFilename}`); | |
// } | |
// // Function to automatically scroll through the entire webpage | |
// async function autoScroll(page){ | |
// await page.evaluate(async () => { | |
// await new Promise((resolve, reject) => { | |
// let totalHeight = 0; | |
// let distance = 100; | |
// let timer = setInterval(() => { | |
// let scrollHeight = document.body.scrollHeight; | |
// window.scrollBy(0, distance); | |
// totalHeight += distance; | |
// if(totalHeight >= scrollHeight){ | |
// clearInterval(timer); | |
// resolve(); | |
// } | |
// }, 100); | |
// }); | |
// }); | |
// } | |
// // Example usage | |
// convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf'); | |
// const puppeteer = require('puppeteer'); | |
// async function convertWebpageToPDF(url, outputFilename) { | |
// const browser = await puppeteer.launch({ headless: "new" }); | |
// const page = await browser.newPage(); | |
// // Set a standard viewport size | |
// await page.setViewport({ width: 1366, height: 768 }); | |
// // Navigate to the page with an increased timeout | |
// await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); | |
// // Handle cookie consent - replace the selector with the actual one | |
// const cookiesButtonSelector = 'YOUR_COOKIE_CONSENT_SELECTOR'; // Replace with the actual selector | |
// if (await page.$(cookiesButtonSelector) !== null) { | |
// await page.click(cookiesButtonSelector); | |
// console.log('Cookie consent accepted.'); | |
// } | |
// // Scroll through the entire webpage | |
// await autoScroll(page); | |
// // Optional: Add a delay to ensure all dynamic content has stabilized | |
// await page.waitForTimeout(5000); // Wait for 5 seconds | |
// // Capture the full webpage as PDF | |
// await page.pdf({ path: outputFilename, format: 'A4', printBackground: true }); | |
// await browser.close(); | |
// console.log(`PDF created: ${outputFilename}`); | |
// } | |
// // Function to automatically scroll through the entire webpage | |
// async function autoScroll(page) { | |
// await page.evaluate(async () => { | |
// await new Promise((resolve, reject) => { | |
// let totalHeight = 0; | |
// let distance = 100; | |
// let timer = setInterval(() => { | |
// let scrollHeight = document.body.scrollHeight; | |
// window.scrollBy(0, distance); | |
// totalHeight += distance; | |
// if(totalHeight >= scrollHeight){ | |
// clearInterval(timer); | |
// resolve(); | |
// } | |
// }, 100); | |
// }); | |
// }); | |
// } | |
// // Example usage | |
// convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf'); | |
const puppeteer = require('puppeteer'); | |
async function convertWebpageToPDF(url, outputFilename) { | |
const browser = await puppeteer.launch({ headless: "new" }); | |
const page = await browser.newPage(); | |
// Set a wider viewport size | |
await page.setViewport({ width: 1920, height: 1080 }); | |
try { | |
// Navigate to the page with a disabled timeout | |
await page.goto(url, { waitUntil: 'networkidle2', timeout: 0 }); | |
// Handle cookie consent - replace the selector with the actual one | |
const cookiesButtonSelector = 'YOUR_COOKIE_CONSENT_SELECTOR'; // Replace with the actual selector | |
if (await page.$(cookiesButtonSelector) !== null) { | |
await page.click(cookiesButtonSelector); | |
console.log('Cookie consent accepted.'); | |
} | |
// Scroll through the entire webpage | |
await autoScroll(page); | |
// Optional: Add a delay to ensure all dynamic content has stabilized | |
await page.waitForTimeout(5000); // Wait for 5 seconds | |
// Capture the full webpage as PDF | |
await page.pdf({ path: outputFilename, format: 'A4', printBackground: true }); | |
} catch (error) { | |
console.error(`Error navigating to the page: ${error.message}`); | |
} finally { | |
await browser.close(); | |
console.log(`PDF created: ${outputFilename}`); | |
} | |
} | |
// Function to automatically scroll through the entire webpage | |
async function autoScroll(page) { | |
await page.evaluate(async () => { | |
await new Promise((resolve, reject) => { | |
let totalHeight = 0; | |
let distance = 100; | |
let timer = setInterval(() => { | |
let scrollHeight = document.body.scrollHeight; | |
window.scrollBy(0, distance); | |
totalHeight += distance; | |
if (totalHeight >= scrollHeight) { | |
clearInterval(timer); | |
resolve(); | |
} | |
}, 100); | |
}); | |
}); | |
} | |
// Example usage | |
convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf'); | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment