Created
September 23, 2018 18:54
-
-
Save radi-cho/c0d8e8f64be73188e8c6a80cc20d3a07 to your computer and use it in GitHub Desktop.
Bag of Words model built with tfjs. How it works, how to train it and use for predictions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// simple vocabulary with positive and negative terms | |
const vocabulary = ["bad", "slow", "ugly", "overrated", "expensive", "wrong", "good", "amazing", "fast", "kind", "cheap"]; | |
const fitData = string => { | |
const stringSplit = string.replace(/[^a-zA-Z ]+/g, "").split(" "); | |
const words = {}; | |
stringSplit.forEach(ev => { | |
words[ev] = | |
typeof words[ev] === "number" ? (words[ev] += 1) : (words[ev] = 1); | |
}); | |
const x = vocabulary.map(ev => { | |
return words[ev] ? words[ev] : 0; | |
}); | |
return x; | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Bag of words model | |
const model = tf.sequential(); | |
model.add(tf.layers.dense({ units: 2, inputShape: [vocabulary.length] })); | |
model.add(tf.layers.dense({ units: 1, inputShape: [2] })); | |
model.compile({ loss: "meanSquaredError", optimizer: "sgd" }); | |
// create tensor-like data | |
const xs_data = comments.map(comment => fitData(comment.text)); | |
const ys_data = comments.map(comment => (comment.isPositive === "positive" ? [1] : [0])); | |
const xs = tf.tensor2d(xs_data); | |
const ys = tf.tensor2d(ys_data); | |
// train the model | |
await model.fit(xs, ys, { epochs: 5 }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment