Created
October 19, 2020 04:15
-
-
Save leonardorifeli/b1ae4c35f9b1edaca7f6d2db7d78a091 to your computer and use it in GitHub Desktop.
Puppeteer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
class Jokes { | |
static async getBestData() { | |
const browser = await this.getBrowser(); | |
const page = (await browser.pages())[0]; | |
await this._definePageConfig(page); | |
console.info('Accessing www.piadas.com.br.'); | |
await page.goto('https://www.piadas.com.br', {waitUntil: 'networkidle2'}); | |
await page.waitForSelector('#menu-4009-1'); | |
await page.click('#menu-4009-1 > a'); | |
console.info('Accessing best jokes.'); | |
await page.waitForSelector('.botao_branco > a') | |
const urls = await page.$$eval('.botao_branco > a', element => element.filter(link => link.href).map(link => link.href)) | |
console.info('Getting best five authors by single joke url.'); | |
let jokesData = []; | |
for (const url of urls.splice(0, 4)) { | |
let jokeData = await this.getData(url); | |
jokesData.push({ | |
author: jokeData.author, | |
date: jokeData.date, | |
}); | |
} | |
console.info('Finished: '); | |
await browser.close(); | |
return jokesData; | |
} | |
static async getData(url) { | |
const browser = await this.getBrowser(); | |
const page = (await browser.pages())[0]; | |
await this._definePageConfig(page); | |
await page.goto(url); | |
const authorElement = await page.$('a[title="Ver perfil do usuário."]'); | |
const author = await page.evaluate(element => element.textContent, authorElement); | |
const dateElement = await page.$('.field-name-post-date'); | |
const date = await page.evaluate(element => element.textContent, dateElement); | |
await browser.close(); | |
return {author, date}; | |
} | |
static async getBrowser() { | |
return await puppeteer.launch({headless: true}); | |
} | |
static async _definePageConfig(page) { | |
page.setViewport({width: 1280, height: 720}); | |
await page.setRequestInterception(true); | |
page.on('request', (request) => { | |
if (['image', 'stylesheet', 'font', 'script'].indexOf(request.resourceType()) !== -1) { | |
request.abort(); | |
} else { | |
request.continue(); | |
} | |
}); | |
} | |
} | |
const jokes = Jokes; | |
(async () => { | |
try { | |
let bestJokers = await jokes.getBestData(); | |
bestJokers.forEach((bestJoker) => { | |
console.log(`- Author: ${bestJoker.author} published on ${bestJoker.date}`) | |
}); | |
} catch (e) { | |
console.error(`Error to parse best jokers on www.piadas.com.br: ${e.message}`) | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result (on now):