Last active
December 14, 2015 03:28
-
-
Save jbowles/5020809 to your computer and use it in GitHub Desktop.
Playing around with node
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*jshint node:true*/ | |
"use strict"; | |
// SEND REQUEST TO CLASSIFY TEXT | |
var http = require('http'), | |
fs = require('fs'), | |
natural = require('natural'), | |
classifier = new natural.BayesClassifier(); | |
// flatten an array | |
function flatten(arr) { | |
return arr.reduce(function (flat, toFlatten) { | |
//mkae sure it needs to be flattened | |
if (toFlatten.some(Array.isArray)) { | |
return flat.concat(flatten(toFlatten)); | |
} else { | |
return flat.concat(toFlatten); | |
} | |
}, []); | |
} | |
function file_string(file_path) { | |
var array_string = []; | |
fs.readFile(file_path, function(err, data) { | |
if(err){ throw err; } | |
array_string.push(data.toString().split("\n")); | |
}); | |
console.log("Finished read " + file_path + " to string"); | |
//flatten the array if needed | |
return array_string; | |
} | |
function sync_file(file_path){ | |
var array_string = []; | |
array_string.push(fs.readFileSync(file_path).toString().split("\n")); | |
return array_string; | |
} | |
function tokenize_file(file_path, encoding) { | |
var tokens = []; | |
var file = fs.createReadStream(file_path); | |
file.on('error', function(err) { | |
console.log('Error '+err); | |
throw err; | |
}); | |
file.setEncoding(encoding); | |
file.on('data', function(data) { | |
var tokenizer = new natural.WordTokenizer(); | |
tokenizer.tokenize(data).map(function (x) { | |
//console.log(x); | |
tokens.push(x); | |
}); | |
//console.log('Data '+data); | |
}); | |
file.on('end', function(){ | |
console.log('Finished reading all of the data'); | |
}); | |
return tokens; | |
} | |
// hand tooled examples for debugging | |
//classifier.addDocument('I like to cook food, especially Bechamel sauce.','FOOD or COOKING'); | |
//classifier.addDocument('Bayesian modelling and Boolean text classification with information retrieval.','INFORMATION RETRIEVAL'); | |
//LOAD FILE SYNCHRONOUSLY | |
var tiebreak = sync_file('/Users/jbowles/x/node/versuch/data/tiebreak.txt'); | |
var generic = sync_file('/Users/jbowles/x/node/versuch/data/generic.txt'); | |
var sport = sync_file('/Users/jbowles/x/node/versuch/data/sports.txt'); | |
var food = sync_file('/Users/jbowles/x/node/versuch/data/food.txt'); | |
var advert = sync_file('/Users/jbowles/x/node/versuch/data/advertising.txt'); | |
var computer = sync_file('/Users/jbowles/x/node/versuch/data/computer.txt'); | |
var economy = sync_file('/Users/jbowles/x/node/versuch/data/economy.txt'); | |
var health = sync_file('/Users/jbowles/x/node/versuch/data/health.txt'); | |
classifier.addDocument(tiebreak[0],'Cannot Determine'); | |
classifier.addDocument(generic[0],'GENERIC'); | |
classifier.addDocument(sport[0],'SPORTS'); | |
classifier.addDocument(food[0],'FOOD'); | |
classifier.addDocument(advert[0],'ADVERTISING'); | |
classifier.addDocument(computer[0],'COMPUTER'); | |
classifier.addDocument(economy[0],'ECONOMY'); | |
classifier.addDocument(health[0],'HEALTH'); | |
classifier.train(); | |
/* // hand tooled examples for debugging | |
console.log(classifier.classify('Classify this!')); | |
console.log(classifier.classify("I really dont think that was appropriate.")); | |
console.log(classifier.getClassifications('Classify this!')); | |
console.log(classifier.getClassifications("I really dont think that was appropriate.")); | |
*/ | |
// make a request: curl -d "hello my name is coolio" http://localhost:8080 | |
http.createServer(function(request, response) { | |
response.writeHead(200, {"Content-Type" : "text/plain"}); | |
var container = {}; | |
request.on('data', function(data) { | |
//container.clf = 'notworking'; | |
container.clf = classifier.classify(data.toString()); | |
//container.get_clf = [{'label':'notworking', 'value':'yet'}]; | |
container.get_clf = classifier.getClassifications(data.toString()); | |
// log the requested chunk | |
container.get_clf.forEach(function (element) { | |
response.write("\nlabel: " + element.label + "\nvalue: " + element.value); | |
}); | |
// add time to classification values | |
container.get_clf.push(new Date()); | |
console.log(container.get_clf); | |
// echo response | |
response.write("\n\nWinner Tag: " + container.clf + "\nRequest: "); | |
}); | |
request.on('end', function() { | |
response.end(); | |
}); | |
// pipe response | |
request.pipe(response); | |
}).listen(8080); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment