Skip to content

Instantly share code, notes, and snippets.

@alanrsoares
Last active August 29, 2015 14:05
Show Gist options
  • Select an option

  • Save alanrsoares/90641ddf082dcc78c821 to your computer and use it in GitHub Desktop.

Select an option

Save alanrsoares/90641ddf082dcc78c821 to your computer and use it in GitHub Desktop.
Simple crawler that fetches questions from the aa.co.nz quiz and stores it locally in a json database
#!/usr/bin/env node
(function() {
'use strict';
var _ = require('lodash');
var Questions = (function() {
var low = require('lowdb');
var db = low('db.json');
var questionsCache = [];
low.mixin(require('underscore.db'));
var questions = function() {
return db('questions');
};
function value() {
if(!questionsCache.length)
questionsCache = [].concat(questions().value());
return questionsCache;
}
function size() {
return value().length;
}
function contains(question) {
return _.any(value(), question);
}
function addNewQuestion(newQuestion) {
questionsCache.push(newQuestion);
questions().insert(newQuestion);
}
function addNewQuestions(newQuestions) {
var added = 0;
newQuestions.forEach(function(question){
if (!contains(question)) {
addNewQuestion(question);
added++;
}
});
return added;
}
return {
value: value,
size: size,
addNewQuestion: addNewQuestion,
addNewQuestions: addNewQuestions,
contains: contains
};
}());
var QuestionsCrawler = (function() {
// dependencies
var low = require('lowdb');
var http = require('http');
// configuration
var config = {
endpoint: 'http://www.aa.co.nz/RoadCodeQuizController/getSet'
};
// fields
var body;
var emptyAttempts = 0;
var questions;
// contructor
function questionsCrawler(questionsRef) {
questions = questionsRef;
}
// private members
function questions() {
return questions.value;
}
function hasReachedLimit() {
return (emptyAttempts >= 50 || questions.size() >= 222);
}
function resetBody() {
body = '';
}
function logLimitReached() {
console.log('No more questions available on the server');
}
function logAvailableQuestions(added) {
if (added)
console.log('%d new question%s added.', added, added > 1 ? 's' : '');
console.log('%d questions available in the database.', questions.size());
return false;
}
function logProgress(emptyAttempts, added) {
if (!emptyAttempts)
return logAvailableQuestions(added);
console.log('Empty attempt #%d', emptyAttempts);
}
function onResponseData(chunk) {
body += chunk;
}
function onResponseEnd() {
var newQuestions = JSON.parse(body);
var added = questions.addNewQuestions(newQuestions);
emptyAttempts = !added ? (emptyAttempts + 1) : 0;
logProgress(emptyAttempts, added);
if (hasReachedLimit()) {
logLimitReached();
logAvailableQuestions();
return false;
}
fetchQuestions();
}
function onSuccess(res) {
res.setEncoding('utf8');
res.on('data', onResponseData);
res.on('end', onResponseEnd);
}
function onError(error) {
console.log('Got error: ' + error.message);
}
function fetchQuestions() {
resetBody();
http.get(config.endpoint, onSuccess)
.on('error', onError);
};
// expose public members
_.extend(questionsCrawler.prototype, {
fetchQuestions: fetchQuestions
});
return questionsCrawler;
}());
var crawler = new QuestionsCrawler(Questions);
//for(var i = 0; i < 20; i++){
crawler.fetchQuestions();
//}
}());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment