Skip to content

Instantly share code, notes, and snippets.

@thisismattmiller
Created February 22, 2012 06:17
Show Gist options
  • Save thisismattmiller/1881977 to your computer and use it in GitHub Desktop.
Save thisismattmiller/1881977 to your computer and use it in GitHub Desktop.
Node Express app.js for visualMOA.org
/**
* This is the Express app.js file.
*/
/**
* Module dependencies.
*/
var express = require('express'),
routes = require('./routes'),
underscore = require('underscore');
var app = module.exports = express.createServer();
// Configuration
app.configure(function () {
app.set('views', __dirname + '/views');
app.set('view engine', 'jade');
app.use(express.bodyParser());
app.use(express.methodOverride());
app.use(require('stylus').middleware({
src: __dirname + '/public'
}));
app.use(app.router);
app.use(express.static(__dirname + '/public'));
});
app.configure('development', function () {
app.use(express.errorHandler({
dumpExceptions: true,
showStack: true
}));
});
app.configure('production', function () {
app.use(express.errorHandler());
});
// Routes
app.get('/', function (req, res) {
res.render('index', {
title: 'Home'
});
});
app.get('/search/:word', function (req, res) {
search(req, res);
});
app.get('/ngram/:id', function (req, res) {
singleReq(req, res);
});
app.get('/authors/', function (req, res) {
authorsAll(req, res);
});
app.get('/authors/filter/:author/:terms', function (req, res) {
authorsFilter(req, res);
});
app.get('/ocrToolNext/:skip', function (req, res) {
ocrToolNext(req, res);
});
app.get('/ocrToolUpdate/:word/:review/:incorrect/:correct', function (req, res) {
ocrToolUpdate(req, res);
});
app.get('/articles/:term', function (req, res) {
articlesReq2(req, res);
});
app.get('/filter/:year/:term/:form/:noun', function (req, res) {
filter(req, res);
});
app.get('/page/:vol/:page', function (req, res) {
pageReq(req, res);
});
app.get('/wordContext/:word', function (req, res) {
wordsGetContex(req, res);
});
app.get('/text/:year/:first_term/:first_form/:second_term/:second_form', function (req, res) {
textReq(req, res);
});
app.listen(3000);
console.log("Express server listening on port %d in %s mode", app.address().port, app.settings.env);
//Returns the n-gram data for a single index item
var singleReq = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('index', function (err, coll) {
var mongo = require('mongodb');
var BSON = mongo.BSONPure;
var o_id = new BSON.ObjectID(req.params.id);
var terms = {
_id: o_id
};
coll.find(terms, function (err, cursor) {
cursor.toArray(function (err, items) {
jsonString = JSON.stringify(items);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(jsonString);
res.end();
});
});
});
});
}
//updates the words record from the OCR correct tool
var ocrToolUpdate = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('words', function (err, coll) {
coll.update({
_id: req.params.word
}, {
$set: {
review: parseInt(req.params.review),
incorrect: req.params.incorrect,
correct: req.params.correct
}
});
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write('');
res.end();
});
});
}
//Gets the next OCR word to check
var ocrToolNext = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('words', function (err, coll) {
coll.find({
review: 1
}, function (err, cursor) {
cursor.sort({
freq: -1
});
if (parseInt(req.params.skip) != 0) {
cursor.skip(parseInt(req.params.skip));
}
cursor.limit(1);
cursor.toArray(function (err, items) {
jsonString = JSON.stringify(items);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(jsonString);
res.end();
});
});
});
});
}
//Returns the sentence the word occured in from the words collection
var wordsGetContex = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('words', function (err, coll) {
coll.find({
_id: req.params.word
}, function (err, cursor) {
cursor.toArray(function (err, items) {
jsonString = JSON.stringify(items);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(jsonString);
res.end();
});
});
});
});
}
//returns the articles a author wrote
var authorsFilter = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('articles', function (err, coll) {
var query;
if (req.params.terms.split(',').length == 1) {
query = {
author: req.params.author,
terms: req.params.terms
};
} else {
var terms = req.params.terms.split(',');
var termsAry = [];
for (term in terms) {
termsAry.push({
terms: terms[term]
})
}
query = {
$and: [{
author: req.params.author
}, {
$or: termsAry
}]
};
}
coll.find(query, function (err, cursor) {
cursor.toArray(function (err, items) {
jsonString = JSON.stringify(items);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(jsonString);
res.end();
});
});
});
});
}
//loads the author list for the network tool
var authorsAll = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('allAuthors', function (err, coll) {
coll.find({}, function (err, cursor) {
cursor.sort({
value: -1
});
cursor.toArray(function (err, items) {
jsonString = JSON.stringify(items);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(jsonString);
res.end();
});
});
});
});
}
//returns the articles for the article tree
var articlesReq2 = function (req, res) {
var Db = require('mongodb').Db,
Connection = require('mongodb').Connection,
Server = require('mongodb').Server;
var obj = {
name: req.params.term,
children: []
};
var blackList = [req.params.term];
var db = new Db('moa', new Server('localhost', 27017, {}), {
native_parser: true
});
db.open(function (err, db) {
db.collection('articles', function (err, collection) {
collection.find({
terms: req.params.term
}, function (err, cursor) {
cursor.limit(1500);
//first level
cursor.toArray(function (err, docs) {
console.log(docs.length);
var count = 0;
docs.forEach(function (doc) {
var allTerms = ""
for (aT in doc.terms) {
allTerms = allTerms + doc.terms[aT] + ",";
}
allTerms = allTerms.substring(0, allTerms.length - 1);
count = count + 1;
var article = {
id: doc._id + "_" + count,
year: doc.year,
size: 1,
name: doc.title,
sentence: doc.sentence,
terms: allTerms,
journal: doc.journal
};
for (aTerm in doc.terms) {
if (doc.terms[aTerm] != req.params.term) {
var found = false;
for (aChild in obj.children) {
if (obj.children[aChild].name == doc.terms[aTerm]) {
obj.children[aChild].size = obj.children[aChild].size + 1;
found = true;
if (obj.children[aChild].children.length < 50) {
obj.children[aChild].children.push(article);
}
}
}
if (found == false) {
obj.children.push({
name: doc.terms[aTerm],
children: [article],
size: 1
});
//blackList.push(doc.terms[aTerm]);
}
}
}
});
obj.children.sort(function (a, b) {
return b.size - a.size;
})
if (obj.children.length > 200) {
obj.children.splice(201, (obj.children.length - 201));
}
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write(JSON.stringify(obj));
res.end();
});
});
});
});
}
//seaches the index for a term and returns matches
var search = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('index', function (err, coll) {
regexp = new RegExp(req.params.word, 'i');
terms = {
word: regexp
};
coll.find(terms, function (err, cursor) {
cursor.sort({
total: -1
});
cursor.toArray(function (err, items) {
jsonString = '';
for (i = 0; i < items.length; i++) {
if (items[i]['form'] == 'np') {
jsonString = jsonString + '{"name":"' + items[i]['word'].capitalize() + '","id":"' + items[i]['_id'] + '","size":"' + items[i]['total'] + '","type":"np"},';
} else {
jsonString = jsonString + '{"name":"' + items[i]['word'] + '","id":"' + items[i]['_id'] + '","size":"' + items[i]['total'] + '","type":"nn"},';
}
}
jsonString = jsonString.substring(0, jsonString.length - 1);
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.write('{"children":[' + jsonString + ']}');
res.end();
});
});
});
});
}
//gets the text for a requested page
var pageReq = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('journals', function (err, coll) {
var vol = req.params.vol.replace(/\|/g, "/");
coll.find({
"text_meta_decls": vol,
"text_meta_page": "IMG" + req.params.page
}, function (err, cursor) {
cursor.sort({
"text_meta_seq": 1
});
cursor.toArray(function (err, items) {
res.writeHead(200, {
'Content-Type': 'application/json'
});
jsonString = '';
lastTitle = '';
for (i = 0; i < items.length; i++) {
var useTitle = (lastTitle == items[i]['article_title']) ? "" : items[i]['article_title'];
jsonString = jsonString + '{"title":' + JSON.stringify(useTitle) + ',"sentence":' + JSON.stringify(items[i]['sentence']) + "},";
lastTitle = items[i]['article_title'];
}
jsonString = jsonString.substring(0, jsonString.length - 1);
res.write('{"results":[' + jsonString + ']}');
res.end();
});
});
});
});
}
var textReq = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('journals', function (err, coll) {
if (req.params.first_form == 'np') {
var first_form = {
propers: req.params.first_term
};
} else if (req.params.first_form == 'nn') {
var first_form = {
nouns: req.params.first_term
};
}
if (req.params.second_form == 'np') {
var second_form = {
propers: req.params.second_term
};
} else if (req.params.second_form == 'nn') {
var second_form = {
nouns: req.params.second_term
};
} else if (req.params.second_form == 'vb') {
var second_form = {
verbs: req.params.second_term
};
}
coll.find({
$and: [{
"year": parseInt(req.params.year)
},
first_form, second_form]
}, function (err, cursor) {
cursor.toArray(function (err, items) {
res.writeHead(200, {
'Content-Type': 'application/json'
});
jsonString = '';
for (i = 0; i < items.length; i++) {
jsonString = jsonString + '{"title":' + JSON.stringify(items[i]['article_title']) + ',"sentence":' + JSON.stringify(items[i]['sentence']) + ',"image":"' + items[i]['text_meta_page'] + '","date":"' + items[i]['date'] + '","page":"' + items[i]['text_meta_decls'] + '"},';
}
jsonString = jsonString.substring(0, jsonString.length - 1);
res.write('{"results":[' + jsonString + ']}');
res.end();
});
});
});
});
}
//grab connected words
var filter = function (req, res) {
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) {
conn.collection('journals', function (err, coll) {
console.log('Running filter request - >' + req.params.term + ' - > ' + req.params.year)
var proper = {};
if (req.params.form == 'np') {
var useIndex = 'propers';
} else if (req.params.form == 'nn') {
var useIndex = 'nouns';
} else {
var useIndex = 'verbs';
}
if (req.params.noun == 'np') {
var form = {
propers: req.params.term
};
} else {
var form = {
nouns: req.params.term
};
}
coll.find({
$and: [{
"year": parseInt(req.params.year)
},
form]
}, function (err, cursor) {
cursor.toArray(function (err, items) {
res.writeHead(200, {
'Content-Type': 'application/json'
});
console.log('Found ' + items.length + ' items.');
if (items.length == 0) {
res.write('[]');
res.end();
return;
}
for (i = 0; i < items.length; i++) {
for (x = 0; x < items[i][useIndex].length; x++) {
//the var might not be set yet
if (underscore.isUndefined(proper[items[i][useIndex][x]])) {
proper[items[i][useIndex][x]] = {
term: items[i][useIndex][x],
count: 0
};
}
proper[items[i][useIndex][x]].count = proper[items[i][useIndex][x]].count + 1;
}
}
proper = underscore.sortBy(proper, function (num) {
return num.count;
});
proper = proper.reverse();
if (proper.length > 10) {
//calculate the threashhold of items to return so we dont get like 1000 count:1
//right now it is set to the top :
countThreashold = underscore.max(proper, function (p) {
return p.count;
}).count * 0.09;
//ehhhh
countThreashold = 2;
}
if (proper.length > 200) {
countThreashold = 3;
}
if (proper.length > 400) {
countThreashold = 4;
}
if (proper.length > 500) {
countThreashold = 5;
}
if (proper.length < 12) {
countThreashold = 0;
}
console.log('countThreashold:' + countThreashold);
jsonString = '';
for (i = 0; i < proper.length; i++) {
if (proper[i].count < countThreashold) {
console.log('break!');
break;
}
if (proper[i].term != req.params.term) {
if (proper[i].term.length > 2) {
jsonString = jsonString + '{"name":"' + proper[i].term.capitalize() + '","size":"' + proper[i].count + '"},';
}
}
}
jsonString = jsonString.substring(0, jsonString.length - 1);
res.write('{"children":[' + jsonString + ']}');
res.end();
});
});
});
});
}
function sortNumber(a, b) {
return b - a;
}
String.prototype.capitalize = function () {
return this.replace(/(^|\s)([a-z])/g, function (m, p1, p2) {
return p1 + p2.toUpperCase();
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment