Skip to content

Instantly share code, notes, and snippets.

@giacecco
Created May 4, 2014 15:46
Show Gist options
  • Save giacecco/428b07812ae3a9b0e75b to your computer and use it in GitHub Desktop.
Save giacecco/428b07812ae3a9b0e75b to your computer and use it in GitHub Desktop.
var async = require('async'),
cheerio = require('cheerio'),
request = require('request'),
argv = require('yargs').argv,
_ = require('underscore');
var baseUrl = "http://journalisted.com/search?q=" + encodeURIComponent(argv._.join(' ')) + "&o=date&type=article";
var getNoOfPages = function (callback) {
request(baseUrl, function (err, response, body) {
var $ = cheerio.load(body),
noOfPages = parseInt(_.last($("#content div.main div.search-results div.paginator").text().split(' ')));
callback(null, noOfPages);
});
};
var getPageResults = function (pageNo, callback) {
request(baseUrl + "&p=" + pageNo, function (err, response, body) {
var $ = cheerio.load(body),
entries = $("#content div.main div.search-results div.body ul li").map(function (i, elem) {
return {
'permalink': $("a", elem).attr("href"),
'srcorg': $("span.publication", elem).text(),
'pubdate': new Date($("abbr.published", elem).attr("title")),
};
}).get();
callback(null, entries);
});
}
var getResults = function (callback) {
getNoOfPages(function (err, noOfPages) {
async.reduce(_.range(0, noOfPages), [ ], function (memo, pageNo, callback) {
getPageResults(pageNo, function (err, results) {
callback(err, memo.concat(results));
});
}, callback);
});
}
getResults(function (err, entries) {
console.log(entries);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment