Created
July 26, 2018 07:57
-
-
Save richard512/b5396d12626c6d731e17bd476b8648ee to your computer and use it in GitHub Desktop.
JavaScript Naive Bayes Classifier -- Web Browser Compatible
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
needsClassifier = (typeof Classifier === 'undefined') | |
if (needsClassifier) { | |
Classifier = class { | |
constructor() { | |
this.dict = {}; | |
this.categories = {}; | |
this.wordList = []; | |
this.categoryList = [] | |
} | |
static validate(token) { | |
return /\w+/.test(token) | |
} | |
increment(token, category) { | |
this.categories[category].tokenCount += 1; | |
let word = this.dict[token]; | |
if (word === undefined) { | |
this.dict[token] = { | |
word: token, | |
[category]: { | |
count: 1 | |
} | |
}; | |
this | |
.wordList | |
.push(token) | |
} else if (word[category] === undefined) { | |
word[category] = { | |
count: 1 | |
} | |
} else { | |
word[category].count += 1 | |
} | |
} | |
train(data, category) { | |
if (this.categories[category] === undefined) { | |
this.categories[category] = { | |
docCount: 1, | |
tokenCount: 0 | |
}; | |
this | |
.categoryList | |
.push(category) | |
} else { | |
this.categories[category].docCount += 1 | |
} | |
let tokens = data.split(/\W+/); | |
tokens.forEach(token => { | |
token = token.toLowerCase(); | |
if (Classifier.validate(token)) { | |
this.increment(token, category) | |
} | |
}) | |
} | |
trainlist(datalist, category) { | |
let i = 0; | |
if (this.categories[category] === undefined) { | |
this.categories[category] = { | |
docCount: 1, | |
tokenCount: 0 | |
}; | |
this | |
.categoryList | |
.push(category) | |
} else { | |
this.categories[category].docCount += 1 | |
} | |
for (i=0; i<datalist.length; i++) { | |
let data = datalist[i] | |
let tokens = data.split(/\W+/); | |
tokens.forEach(token => { | |
token = token.toLowerCase(); | |
if (Classifier.validate(token)) { | |
this.increment(token, category) | |
} | |
}) | |
} | |
} | |
probabilities() { | |
this | |
.wordList | |
.forEach(key => { | |
let word = this.dict[key]; | |
this | |
.categoryList | |
.forEach(category => { | |
if (word[category] === undefined) { | |
word[category] = { | |
count: 0 | |
} | |
} | |
let wordCat = word[category]; | |
let cat = this.categories[category]; | |
let freq = wordCat.count / cat.docCount; | |
wordCat.freq = freq | |
}) | |
}); | |
this | |
.wordList | |
.forEach(key => { | |
let word = this.dict[key]; | |
this | |
.categoryList | |
.forEach(category => { | |
let sum = this | |
.categoryList | |
.reduce((p, cat) => { | |
let freq = word[cat].freq; | |
if (freq) { | |
return p + freq | |
} | |
return p | |
}, 0); | |
let wordCat = word[category]; | |
let prob = wordCat.freq / sum; | |
wordCat.prob = Math.max(0.01, Math.min(0.99, prob)) | |
}) | |
}) | |
} | |
guess(data) { | |
let tokens = data.split(/\W+/); | |
let words = []; | |
tokens.forEach(token => { | |
token = token.toLowerCase(); | |
if (Classifier.validate(token)) { | |
if (this.dict[token] !== undefined) { | |
let word = this.dict[token]; | |
words.push(word) | |
} | |
} else {} | |
}); | |
let sum = 0; | |
let products = this | |
.categoryList | |
.reduce((product, category) => { | |
product[category] = words.reduce((prob, word) => { | |
return prob * word[category].prob | |
}, 1); | |
sum += product[category]; | |
return product | |
}, {}); | |
let results = {}; | |
this | |
.categoryList | |
.forEach(category => { | |
results[category] = { | |
probability: products[category] / sum | |
}; | |
}); | |
return results | |
} | |
} | |
} | |
if (typeof classifier == 'undefined') { | |
classifier = new Classifier(); | |
} | |
classifier.trainlist(["happy"], "happy"); | |
classifier.train("I am sad and I am very sad.", "sad"); | |
classifier.train("I have mixed feelings.", "mixed"); | |
classifier.probabilities(); | |
results = classifier.guess("Yesterday, I was very very happy, so happy."); | |
console.log(results); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment