Created
December 15, 2018 15:37
-
-
Save blackjacques/b83fecc6471a0b527b8a48d45ccc214c to your computer and use it in GitHub Desktop.
Demo for the Web Page Scraping with Node.js htmlgoodies.com Article
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer-core'); | |
const clickByText = async (page, text) => { | |
const links = await page.$x(`//a[contains(@class,'gs-title')][contains(text(), '${text}')]`); | |
if (links.length > 0) { | |
await links[0].click(); | |
} else { | |
console.log(`Link not found: ${text}`); | |
} | |
}; | |
puppeteer.launch({ | |
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe', | |
headless: false | |
}).then(async browser => { | |
const page = (await browser.pages())[0]; | |
await page.setViewport({ width: 1280, height: 800 }) | |
await page.goto('https://www.htmlgoodies.com/'); | |
await page.click('.zz_overlay_close'); | |
await page.waitFor(1000); | |
await page.click('#search-input'); | |
await page.keyboard.type('web scraping'); | |
await page.click('#search-submit'); | |
await page.waitForNavigation(); | |
await page.click('.zz_overlay_close'); | |
await page.waitFor(1000); | |
const articlePagePromise = new Promise(x => browser.once('targetcreated', target => x(target.page()))); | |
await clickByText(page, 'JavaScript Rendered Content'); | |
const articlePage = await articlePagePromise; | |
await articlePage.waitFor(2000); | |
await articlePage.screenshot({ path: 'web_scraping_article.png', fullPage: true }); | |
console.log('All done!'); | |
// browser.close(); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment