Created
February 22, 2012 06:17
-
-
Save thisismattmiller/1881977 to your computer and use it in GitHub Desktop.
Node Express app.js for visualMOA.org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This is the Express app.js file. | |
*/ | |
/** | |
* Module dependencies. | |
*/ | |
var express = require('express'), | |
routes = require('./routes'), | |
underscore = require('underscore'); | |
var app = module.exports = express.createServer(); | |
// Configuration | |
app.configure(function () { | |
app.set('views', __dirname + '/views'); | |
app.set('view engine', 'jade'); | |
app.use(express.bodyParser()); | |
app.use(express.methodOverride()); | |
app.use(require('stylus').middleware({ | |
src: __dirname + '/public' | |
})); | |
app.use(app.router); | |
app.use(express.static(__dirname + '/public')); | |
}); | |
app.configure('development', function () { | |
app.use(express.errorHandler({ | |
dumpExceptions: true, | |
showStack: true | |
})); | |
}); | |
app.configure('production', function () { | |
app.use(express.errorHandler()); | |
}); | |
// Routes | |
app.get('/', function (req, res) { | |
res.render('index', { | |
title: 'Home' | |
}); | |
}); | |
app.get('/search/:word', function (req, res) { | |
search(req, res); | |
}); | |
app.get('/ngram/:id', function (req, res) { | |
singleReq(req, res); | |
}); | |
app.get('/authors/', function (req, res) { | |
authorsAll(req, res); | |
}); | |
app.get('/authors/filter/:author/:terms', function (req, res) { | |
authorsFilter(req, res); | |
}); | |
app.get('/ocrToolNext/:skip', function (req, res) { | |
ocrToolNext(req, res); | |
}); | |
app.get('/ocrToolUpdate/:word/:review/:incorrect/:correct', function (req, res) { | |
ocrToolUpdate(req, res); | |
}); | |
app.get('/articles/:term', function (req, res) { | |
articlesReq2(req, res); | |
}); | |
app.get('/filter/:year/:term/:form/:noun', function (req, res) { | |
filter(req, res); | |
}); | |
app.get('/page/:vol/:page', function (req, res) { | |
pageReq(req, res); | |
}); | |
app.get('/wordContext/:word', function (req, res) { | |
wordsGetContex(req, res); | |
}); | |
app.get('/text/:year/:first_term/:first_form/:second_term/:second_form', function (req, res) { | |
textReq(req, res); | |
}); | |
app.listen(3000); | |
console.log("Express server listening on port %d in %s mode", app.address().port, app.settings.env); | |
//Returns the n-gram data for a single index item | |
var singleReq = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('index', function (err, coll) { | |
var mongo = require('mongodb'); | |
var BSON = mongo.BSONPure; | |
var o_id = new BSON.ObjectID(req.params.id); | |
var terms = { | |
_id: o_id | |
}; | |
coll.find(terms, function (err, cursor) { | |
cursor.toArray(function (err, items) { | |
jsonString = JSON.stringify(items); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(jsonString); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//updates the words record from the OCR correct tool | |
var ocrToolUpdate = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('words', function (err, coll) { | |
coll.update({ | |
_id: req.params.word | |
}, { | |
$set: { | |
review: parseInt(req.params.review), | |
incorrect: req.params.incorrect, | |
correct: req.params.correct | |
} | |
}); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(''); | |
res.end(); | |
}); | |
}); | |
} | |
//Gets the next OCR word to check | |
var ocrToolNext = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('words', function (err, coll) { | |
coll.find({ | |
review: 1 | |
}, function (err, cursor) { | |
cursor.sort({ | |
freq: -1 | |
}); | |
if (parseInt(req.params.skip) != 0) { | |
cursor.skip(parseInt(req.params.skip)); | |
} | |
cursor.limit(1); | |
cursor.toArray(function (err, items) { | |
jsonString = JSON.stringify(items); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(jsonString); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//Returns the sentence the word occured in from the words collection | |
var wordsGetContex = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('words', function (err, coll) { | |
coll.find({ | |
_id: req.params.word | |
}, function (err, cursor) { | |
cursor.toArray(function (err, items) { | |
jsonString = JSON.stringify(items); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(jsonString); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//returns the articles a author wrote | |
var authorsFilter = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('articles', function (err, coll) { | |
var query; | |
if (req.params.terms.split(',').length == 1) { | |
query = { | |
author: req.params.author, | |
terms: req.params.terms | |
}; | |
} else { | |
var terms = req.params.terms.split(','); | |
var termsAry = []; | |
for (term in terms) { | |
termsAry.push({ | |
terms: terms[term] | |
}) | |
} | |
query = { | |
$and: [{ | |
author: req.params.author | |
}, { | |
$or: termsAry | |
}] | |
}; | |
} | |
coll.find(query, function (err, cursor) { | |
cursor.toArray(function (err, items) { | |
jsonString = JSON.stringify(items); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(jsonString); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//loads the author list for the network tool | |
var authorsAll = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('allAuthors', function (err, coll) { | |
coll.find({}, function (err, cursor) { | |
cursor.sort({ | |
value: -1 | |
}); | |
cursor.toArray(function (err, items) { | |
jsonString = JSON.stringify(items); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(jsonString); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//returns the articles for the article tree | |
var articlesReq2 = function (req, res) { | |
var Db = require('mongodb').Db, | |
Connection = require('mongodb').Connection, | |
Server = require('mongodb').Server; | |
var obj = { | |
name: req.params.term, | |
children: [] | |
}; | |
var blackList = [req.params.term]; | |
var db = new Db('moa', new Server('localhost', 27017, {}), { | |
native_parser: true | |
}); | |
db.open(function (err, db) { | |
db.collection('articles', function (err, collection) { | |
collection.find({ | |
terms: req.params.term | |
}, function (err, cursor) { | |
cursor.limit(1500); | |
//first level | |
cursor.toArray(function (err, docs) { | |
console.log(docs.length); | |
var count = 0; | |
docs.forEach(function (doc) { | |
var allTerms = "" | |
for (aT in doc.terms) { | |
allTerms = allTerms + doc.terms[aT] + ","; | |
} | |
allTerms = allTerms.substring(0, allTerms.length - 1); | |
count = count + 1; | |
var article = { | |
id: doc._id + "_" + count, | |
year: doc.year, | |
size: 1, | |
name: doc.title, | |
sentence: doc.sentence, | |
terms: allTerms, | |
journal: doc.journal | |
}; | |
for (aTerm in doc.terms) { | |
if (doc.terms[aTerm] != req.params.term) { | |
var found = false; | |
for (aChild in obj.children) { | |
if (obj.children[aChild].name == doc.terms[aTerm]) { | |
obj.children[aChild].size = obj.children[aChild].size + 1; | |
found = true; | |
if (obj.children[aChild].children.length < 50) { | |
obj.children[aChild].children.push(article); | |
} | |
} | |
} | |
if (found == false) { | |
obj.children.push({ | |
name: doc.terms[aTerm], | |
children: [article], | |
size: 1 | |
}); | |
//blackList.push(doc.terms[aTerm]); | |
} | |
} | |
} | |
}); | |
obj.children.sort(function (a, b) { | |
return b.size - a.size; | |
}) | |
if (obj.children.length > 200) { | |
obj.children.splice(201, (obj.children.length - 201)); | |
} | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write(JSON.stringify(obj)); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//seaches the index for a term and returns matches | |
var search = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('index', function (err, coll) { | |
regexp = new RegExp(req.params.word, 'i'); | |
terms = { | |
word: regexp | |
}; | |
coll.find(terms, function (err, cursor) { | |
cursor.sort({ | |
total: -1 | |
}); | |
cursor.toArray(function (err, items) { | |
jsonString = ''; | |
for (i = 0; i < items.length; i++) { | |
if (items[i]['form'] == 'np') { | |
jsonString = jsonString + '{"name":"' + items[i]['word'].capitalize() + '","id":"' + items[i]['_id'] + '","size":"' + items[i]['total'] + '","type":"np"},'; | |
} else { | |
jsonString = jsonString + '{"name":"' + items[i]['word'] + '","id":"' + items[i]['_id'] + '","size":"' + items[i]['total'] + '","type":"nn"},'; | |
} | |
} | |
jsonString = jsonString.substring(0, jsonString.length - 1); | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
res.write('{"children":[' + jsonString + ']}'); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//gets the text for a requested page | |
var pageReq = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('journals', function (err, coll) { | |
var vol = req.params.vol.replace(/\|/g, "/"); | |
coll.find({ | |
"text_meta_decls": vol, | |
"text_meta_page": "IMG" + req.params.page | |
}, function (err, cursor) { | |
cursor.sort({ | |
"text_meta_seq": 1 | |
}); | |
cursor.toArray(function (err, items) { | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
jsonString = ''; | |
lastTitle = ''; | |
for (i = 0; i < items.length; i++) { | |
var useTitle = (lastTitle == items[i]['article_title']) ? "" : items[i]['article_title']; | |
jsonString = jsonString + '{"title":' + JSON.stringify(useTitle) + ',"sentence":' + JSON.stringify(items[i]['sentence']) + "},"; | |
lastTitle = items[i]['article_title']; | |
} | |
jsonString = jsonString.substring(0, jsonString.length - 1); | |
res.write('{"results":[' + jsonString + ']}'); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
var textReq = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('journals', function (err, coll) { | |
if (req.params.first_form == 'np') { | |
var first_form = { | |
propers: req.params.first_term | |
}; | |
} else if (req.params.first_form == 'nn') { | |
var first_form = { | |
nouns: req.params.first_term | |
}; | |
} | |
if (req.params.second_form == 'np') { | |
var second_form = { | |
propers: req.params.second_term | |
}; | |
} else if (req.params.second_form == 'nn') { | |
var second_form = { | |
nouns: req.params.second_term | |
}; | |
} else if (req.params.second_form == 'vb') { | |
var second_form = { | |
verbs: req.params.second_term | |
}; | |
} | |
coll.find({ | |
$and: [{ | |
"year": parseInt(req.params.year) | |
}, | |
first_form, second_form] | |
}, function (err, cursor) { | |
cursor.toArray(function (err, items) { | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
jsonString = ''; | |
for (i = 0; i < items.length; i++) { | |
jsonString = jsonString + '{"title":' + JSON.stringify(items[i]['article_title']) + ',"sentence":' + JSON.stringify(items[i]['sentence']) + ',"image":"' + items[i]['text_meta_page'] + '","date":"' + items[i]['date'] + '","page":"' + items[i]['text_meta_decls'] + '"},'; | |
} | |
jsonString = jsonString.substring(0, jsonString.length - 1); | |
res.write('{"results":[' + jsonString + ']}'); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
//grab connected words | |
var filter = function (req, res) { | |
require('mongodb').connect('mongodb://localhost:27017/moa', function (err, conn) { | |
conn.collection('journals', function (err, coll) { | |
console.log('Running filter request - >' + req.params.term + ' - > ' + req.params.year) | |
var proper = {}; | |
if (req.params.form == 'np') { | |
var useIndex = 'propers'; | |
} else if (req.params.form == 'nn') { | |
var useIndex = 'nouns'; | |
} else { | |
var useIndex = 'verbs'; | |
} | |
if (req.params.noun == 'np') { | |
var form = { | |
propers: req.params.term | |
}; | |
} else { | |
var form = { | |
nouns: req.params.term | |
}; | |
} | |
coll.find({ | |
$and: [{ | |
"year": parseInt(req.params.year) | |
}, | |
form] | |
}, function (err, cursor) { | |
cursor.toArray(function (err, items) { | |
res.writeHead(200, { | |
'Content-Type': 'application/json' | |
}); | |
console.log('Found ' + items.length + ' items.'); | |
if (items.length == 0) { | |
res.write('[]'); | |
res.end(); | |
return; | |
} | |
for (i = 0; i < items.length; i++) { | |
for (x = 0; x < items[i][useIndex].length; x++) { | |
//the var might not be set yet | |
if (underscore.isUndefined(proper[items[i][useIndex][x]])) { | |
proper[items[i][useIndex][x]] = { | |
term: items[i][useIndex][x], | |
count: 0 | |
}; | |
} | |
proper[items[i][useIndex][x]].count = proper[items[i][useIndex][x]].count + 1; | |
} | |
} | |
proper = underscore.sortBy(proper, function (num) { | |
return num.count; | |
}); | |
proper = proper.reverse(); | |
if (proper.length > 10) { | |
//calculate the threashhold of items to return so we dont get like 1000 count:1 | |
//right now it is set to the top : | |
countThreashold = underscore.max(proper, function (p) { | |
return p.count; | |
}).count * 0.09; | |
//ehhhh | |
countThreashold = 2; | |
} | |
if (proper.length > 200) { | |
countThreashold = 3; | |
} | |
if (proper.length > 400) { | |
countThreashold = 4; | |
} | |
if (proper.length > 500) { | |
countThreashold = 5; | |
} | |
if (proper.length < 12) { | |
countThreashold = 0; | |
} | |
console.log('countThreashold:' + countThreashold); | |
jsonString = ''; | |
for (i = 0; i < proper.length; i++) { | |
if (proper[i].count < countThreashold) { | |
console.log('break!'); | |
break; | |
} | |
if (proper[i].term != req.params.term) { | |
if (proper[i].term.length > 2) { | |
jsonString = jsonString + '{"name":"' + proper[i].term.capitalize() + '","size":"' + proper[i].count + '"},'; | |
} | |
} | |
} | |
jsonString = jsonString.substring(0, jsonString.length - 1); | |
res.write('{"children":[' + jsonString + ']}'); | |
res.end(); | |
}); | |
}); | |
}); | |
}); | |
} | |
function sortNumber(a, b) { | |
return b - a; | |
} | |
String.prototype.capitalize = function () { | |
return this.replace(/(^|\s)([a-z])/g, function (m, p1, p2) { | |
return p1 + p2.toUpperCase(); | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment