Created
March 29, 2012 11:16
-
-
Save lrvick/2236010 to your computer and use it in GitHub Desktop.
Naive Bayes sentiment classifer attempt in javascript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var baez = (function(){ | |
var stopwords = ['i','in','and','to','are','the','but','my','they','those','them','you','a'] | |
function tokenize(sample){ | |
var tokens = [] | |
sample.split(' ').forEach(function(token){ | |
if (baez.stopwords.indexOf(token) & /^[a-zA-Z0-9]+$/.test(token)){ | |
tokens.push(token.toLowerCase()) | |
} | |
}) | |
return tokens | |
} | |
function probdist(tokens,totals){ | |
Object.keys(tokens).forEach(function(label){ | |
for (token in tokens[label]){ | |
var prob = tokens[label][token]['c'] / totals[label] | |
tokens[label][token]['p'] = prob; | |
} | |
}); | |
return tokens | |
} | |
function guess(sample){ | |
scores = {} | |
Object.keys(baez.samples).forEach(function(label){ | |
scores[label] = 0 | |
tokenize(sample).forEach(function(token){ | |
var prob = baez.probdist[label][token] || {'p': 0} | |
scores[label] = scores[label] + prob['p'] | |
}) | |
}) | |
return scores | |
} | |
function train(samples){ | |
baez.samples = samples | |
var tokens = {} | |
var totals = {} | |
Object.keys(samples).forEach(function(label){ | |
tokens[label] = {} | |
totals[label] = 0 | |
samples[label].forEach(function(sample){ | |
tokenize(sample).forEach(function(token){ | |
tokens[label][token] = tokens[label][token] || {'c': 0} | |
tokens[label][token]['c'] = tokens[label][token]['c'] + 1 | |
totals[label] = totals[label] + 1 | |
}) | |
}) | |
}); | |
baez.probdist = probdist(tokens,totals) | |
return baez.probdist | |
} | |
return { | |
guess : guess, | |
train : train, | |
stopwords : stopwords, | |
} | |
}()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script> | |
<script type="text/javascript" src="baez.js"></script> | |
<meta charset=utf-8 /> | |
<title>baez - NaiveBayes Sentiment Classification Example</title> | |
<script type="text/javascript"> | |
train_samples = { | |
'pos' : [ | |
'I love you mommy ! You are my amazing person', | |
'I love you and you are a good person', | |
'I win at everything and I want to love people', | |
'good are things are heppening. gbu', | |
'I am so rich', | |
'I want to chase butterflies since they make me happy', | |
'I want to hug bunnies', | |
'You make me smile' | |
], | |
'neg' : [ | |
'I fail at everything and I want to kill people', | |
'sad are things are heppening. fml', | |
'I hate you and you are a bad person', | |
'I hate you mommy ! You are my terrible person', | |
'I want to kill butterflies since they make me sad', | |
'I want to hurt bunnies', | |
'I am so poor', | |
'You make me frown' | |
] | |
} | |
test_samples = [ | |
'You are a terrible person and everything you do is bad', | |
'I love you all and you make me happy', | |
'I frown whenever I see you in a poor state of mind', | |
'Finally getting rich from my ideas. They make me smile.', | |
'My mommy is poor', | |
'I love butterflies. Yay for happy', | |
'Everything is fail today and I hate stuff' | |
] | |
$(document).ready(function(){ | |
var probdist = baez.train(train_samples) | |
console.log(probdist) | |
test_samples.forEach(function(sample){ | |
var scores = baez.guess(sample) | |
$('body').append('<p>'+sample+'<br\> neg:'+scores.neg+' pos:'+scores.pos+'</p>') | |
}) | |
}) | |
</script> | |
</head> | |
<body></body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment