Skip to content

Instantly share code, notes, and snippets.

@nandomoreirame
Last active February 17, 2019 21:13
Show Gist options
  • Select an option

  • Save nandomoreirame/783878c235c3c6efd6fa543ffd1da5b5 to your computer and use it in GitHub Desktop.

Select an option

Save nandomoreirame/783878c235c3c6efd6fa543ffd1da5b5 to your computer and use it in GitHub Desktop.
node crawler pensador.com
{
"name": "html-to-text",
"version": "0.0.1",
"description": "",
"main": "node.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "Fernando Moreira <[email protected]> (https://nandomoreira.me)",
"license": "MIT",
"dependencies": {
"cheerio": "^1.0.0-rc.2",
"request-promise": "^4.2.4"
}
}
'use strict'
const rp = require('request-promise')
const $ = require('cheerio')
const potusParse = require('./potusParse')
const baseUrl = 'https://www.pensador.com'
rp(`${baseUrl}/frases_de_motivacao/`)
.then(html => {
const pages = []
for (let i = 0; i < 20; i++) {
let linkNode = $('.linkDetailImage', html)[i]
if (typeof linkNode === 'object') {
pages.push(linkNode.attribs.href)
}
}
return Promise.all(
pages.map(url => {
return potusParse(`${baseUrl}${url}`)
})
)
})
.then(phrases => {
console.log(phrases)
})
.catch(err => console.error(err))
const rp = require('request-promise')
const $ = require('cheerio')
const potusParse = url =>
rp(url)
.then(html => {
const quote = $('.pensaFrase > .fr', html).text()
const author = $('.pensaFrase > .autor > a', html).text()
if (quote !== '' && author !== '' && quote !== null && author !== null) {
return { quote, author }
}
return {}
})
.catch(err => console.error(err))
module.exports = potusParse
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment