Created
December 14, 2015 10:20
-
-
Save JonathanMH/876e21777402357e449f to your computer and use it in GitHub Desktop.
crawl a page a bunch of times
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
var async = require('async'); | |
var request = require('request'); | |
var cheerio = require('cheerio'); | |
var _ = require('lodash'); | |
var url = 'http://programmingexcuses.com'; | |
var excuses = []; | |
module.exports.crawl = function(callback){ | |
request(url, function (error, response, body) { | |
console.log('crawling'); | |
if (!error && response.statusCode == 200) { | |
$ = cheerio.load(body) // Show the HTML for the Google homepage. | |
excuses.push($('.wrapper center').text()); | |
callback(); | |
} | |
}); | |
} | |
var count = 0; | |
async.whilst( | |
function () { | |
return count < 400; | |
}, | |
function (callback) { | |
module.exports.crawl(function(){ | |
count++; | |
setTimeout(callback, 100); | |
}) | |
}, | |
function (err) { | |
console.log(_.unique(excuses)) | |
} | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "excuses", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"dependencies": { | |
"async": "^1.5.0", | |
"cheerio": "^0.19.0", | |
"lodash": "^3.10.1", | |
"request": "^2.67.0" | |
}, | |
"devDependencies": {}, | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment