Last active
August 29, 2015 14:02
-
-
Save yrezgui/224154e642951d96e834 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var mongodb = require('mongodb'); | |
| var client = mongodb.MongoClient; | |
| var request = require('request'); | |
| var async = require('async'); | |
| var API_JOBS_URL = 'https://api.angel.co/1/jobs'; | |
| var MAX_CONCURRENT_WORKERS = 5; | |
| function databaseConnect(callback) { | |
| client.connect('mongodb://127.0.0.1:27017/shigoto', function(error, db) { | |
| if (error) { | |
| return callback(error, null); | |
| } | |
| callback(null, db); | |
| }); | |
| } | |
| function crawler(db, finalCallback) { | |
| var worker = async.queue(function process(task, callback) { | |
| return request({ | |
| url: task.url, | |
| qs: {page: task.page}, | |
| method: 'GET', | |
| json: true | |
| }, function finish(error, response, body) { | |
| if(error) { | |
| return callback(error); | |
| } | |
| response.body.jobs.forEach(function(job) { | |
| job._id = job.id; | |
| delete job.id; | |
| }); | |
| db.collection('jobs').insert(response.body.jobs, function(error, result) { | |
| if(error) { | |
| return callback(error); | |
| } | |
| console.log('Page ' + task.page + ' processed'); | |
| callback(); | |
| }); | |
| }); | |
| }, MAX_CONCURRENT_WORKERS); | |
| worker.drain = function drain() { | |
| finalCallback('all items have been processed'); | |
| }; | |
| // First request help us to know how many pages there are | |
| request({ | |
| url: API_JOBS_URL, | |
| method: 'GET', | |
| json: true | |
| }, function finish(error, response, body) { | |
| if(error) { | |
| throw error; | |
| } | |
| for (var i = response.body.last_page; i >= 1; i--) { | |
| worker.push({ url: API_JOBS_URL, page: i }); | |
| } | |
| }); | |
| } | |
| async.waterfall([databaseConnect, crawler], function finish(error, result) { | |
| console.log('enfin'); | |
| console.log(error, result); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment