Skip to content

Instantly share code, notes, and snippets.

@petulla
Last active January 23, 2019 19:45
Show Gist options
  • Save petulla/40e88cf5dcf9cfdcbe253e281dfa925f to your computer and use it in GitHub Desktop.
Save petulla/40e88cf5dcf9cfdcbe253e281dfa925f to your computer and use it in GitHub Desktop.
Generic Promise.all + Axios scraper for fetching html content
const io = require('indian-ocean');
const cheerio = require('cheerio');
const axios = require('axios');
// config
const fileName = 'kirk.csv';
const selector = '.row > div > p';
// url generation
const months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
const years = [2016, 2017, 2018];
const urls = years.map(y => months.map( x => `http://www.visualisingdata.com/${y}/${x}/`)).reduce((a, v) => a.concat(v));
const errLog = (err) => console.log(err.message);
const errs = (err) => console.log(err);
const gatherResults = async (res) => {
const $ = cheerio.load(res);
const data = [];
$(selector).each((i, elem) => {
data.push({
text: $(elem).text()
});
});
return data;
};
const gatherBlog = async (page) => {
return await axios.get(page)
.then(res => res.status === 200 ? gatherResults(res.data) : [])
.catch(errs);
};
const write = (flatJson) => io.writeData(fileName, flatJson, {}, errs);
(async function scrape() {
// scrape
const datasets = await Promise.all(urls.map(url => gatherBlog(url))).catch(errLog);
// flatten
const data = [].concat.apply([], datasets);
// write
write(data);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment