Last active
August 29, 2015 14:05
-
-
Save alanrsoares/90641ddf082dcc78c821 to your computer and use it in GitHub Desktop.
Simple crawler that fetches questions from the aa.co.nz quiz and stores it locally in a json database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| (function() { | |
| 'use strict'; | |
| var _ = require('lodash'); | |
| var Questions = (function() { | |
| var low = require('lowdb'); | |
| var db = low('db.json'); | |
| var questionsCache = []; | |
| low.mixin(require('underscore.db')); | |
| var questions = function() { | |
| return db('questions'); | |
| }; | |
| function value() { | |
| if(!questionsCache.length) | |
| questionsCache = [].concat(questions().value()); | |
| return questionsCache; | |
| } | |
| function size() { | |
| return value().length; | |
| } | |
| function contains(question) { | |
| return _.any(value(), question); | |
| } | |
| function addNewQuestion(newQuestion) { | |
| questionsCache.push(newQuestion); | |
| questions().insert(newQuestion); | |
| } | |
| function addNewQuestions(newQuestions) { | |
| var added = 0; | |
| newQuestions.forEach(function(question){ | |
| if (!contains(question)) { | |
| addNewQuestion(question); | |
| added++; | |
| } | |
| }); | |
| return added; | |
| } | |
| return { | |
| value: value, | |
| size: size, | |
| addNewQuestion: addNewQuestion, | |
| addNewQuestions: addNewQuestions, | |
| contains: contains | |
| }; | |
| }()); | |
| var QuestionsCrawler = (function() { | |
| // dependencies | |
| var low = require('lowdb'); | |
| var http = require('http'); | |
| // configuration | |
| var config = { | |
| endpoint: 'http://www.aa.co.nz/RoadCodeQuizController/getSet' | |
| }; | |
| // fields | |
| var body; | |
| var emptyAttempts = 0; | |
| var questions; | |
| // contructor | |
| function questionsCrawler(questionsRef) { | |
| questions = questionsRef; | |
| } | |
| // private members | |
| function questions() { | |
| return questions.value; | |
| } | |
| function hasReachedLimit() { | |
| return (emptyAttempts >= 50 || questions.size() >= 222); | |
| } | |
| function resetBody() { | |
| body = ''; | |
| } | |
| function logLimitReached() { | |
| console.log('No more questions available on the server'); | |
| } | |
| function logAvailableQuestions(added) { | |
| if (added) | |
| console.log('%d new question%s added.', added, added > 1 ? 's' : ''); | |
| console.log('%d questions available in the database.', questions.size()); | |
| return false; | |
| } | |
| function logProgress(emptyAttempts, added) { | |
| if (!emptyAttempts) | |
| return logAvailableQuestions(added); | |
| console.log('Empty attempt #%d', emptyAttempts); | |
| } | |
| function onResponseData(chunk) { | |
| body += chunk; | |
| } | |
| function onResponseEnd() { | |
| var newQuestions = JSON.parse(body); | |
| var added = questions.addNewQuestions(newQuestions); | |
| emptyAttempts = !added ? (emptyAttempts + 1) : 0; | |
| logProgress(emptyAttempts, added); | |
| if (hasReachedLimit()) { | |
| logLimitReached(); | |
| logAvailableQuestions(); | |
| return false; | |
| } | |
| fetchQuestions(); | |
| } | |
| function onSuccess(res) { | |
| res.setEncoding('utf8'); | |
| res.on('data', onResponseData); | |
| res.on('end', onResponseEnd); | |
| } | |
| function onError(error) { | |
| console.log('Got error: ' + error.message); | |
| } | |
| function fetchQuestions() { | |
| resetBody(); | |
| http.get(config.endpoint, onSuccess) | |
| .on('error', onError); | |
| }; | |
| // expose public members | |
| _.extend(questionsCrawler.prototype, { | |
| fetchQuestions: fetchQuestions | |
| }); | |
| return questionsCrawler; | |
| }()); | |
| var crawler = new QuestionsCrawler(Questions); | |
| //for(var i = 0; i < 20; i++){ | |
| crawler.fetchQuestions(); | |
| //} | |
| }()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment