Last active
February 14, 2019 07:49
-
-
Save Erushenko/6641ab1a790237209e2e7cd131278a97 to your computer and use it in GitHub Desktop.
#bluebird #promise #parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const cheerio = require("cheerio") | |
const Promise = require("bluebird") | |
const request = Promise.promisify(require("request")) | |
const csvStringify = require('csv-stringify') | |
const fs = require('fs') | |
const URL = 'https://dou.ua/forums' | |
request(URL) | |
.then(topics) | |
.mapSeries(topic => request(topic.url).then(res => topicContents(res, topic))) | |
.then(data => data.map(item => Object.values(item))) | |
.then(writeFileCsv) | |
.catch(err => { | |
console.log("Error", err) | |
}) | |
function topics(res) { | |
const $ = cheerio.load(res.body); | |
return $("div.b-forum-articles article > h2 > a:first-child") | |
.map((i, link) => ({ | |
title: link.attribs.title, | |
url: link.attribs.href, | |
commentsCount: 0 | |
})) | |
.toArray() | |
} | |
function topicContents(res, topic) { | |
const $ = cheerio.load(res.body) | |
const commentsCount = parseInt($('#lblCommentsCount').text()) | |
topic.commentsCount = Number.isNaN(commentsCount) ? 0 : commentsCount | |
return topic | |
} | |
function writeFileCsv(data) { | |
csvStringify(data, { | |
delimiter: ';', | |
header: true, | |
formatters: { | |
date: it => moment(it).format('YYYY-MM-DD'), | |
bool: it => it ? 'X' : '', | |
}, | |
}, | |
(err, output) => { | |
fs.writeFileSync(getNameFile('./tmp/some-parse'), output, err => { | |
if (err) { | |
console.log('Some error occurred - file either not saved or corrupted file saved.', err) | |
} else { | |
console.log('It\'s saved!') | |
} | |
}) | |
} | |
) | |
} | |
function getNameFile(prefix) { | |
return [ | |
prefix, | |
new Date().getFullYear(), | |
new Date().getMonth() + 1, | |
new Date().getDate(), | |
new Date().getHours(), | |
].join('-') + '.csv' | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment