Last active
October 30, 2018 23:02
-
-
Save jdrew1303/c585ca8059a09daa3ebc2211cff05cae to your computer and use it in GitHub Desktop.
text classification with neural networks in javascript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const mimir = require('mimir'); | |
const brain = require('brain.js'); | |
/* few utils for the example */ | |
// our output is recorded as a vector, where the index in the vector | |
// is the enum (we create it with a zero index). We fill the index in | |
// the array with a 1 and the rest of the items as a 0 (true and false). | |
const vectorise = (category, numberOfCategories) => { | |
const vector = new Array(numberOfCategories).fill(0); | |
vector[category] = 1; | |
return vector; | |
} | |
const maxarg = (array) => array.indexOf(Math.max.apply(Math, array)); | |
// train data | |
const CLASSIFICATION_CLASSES = { | |
HISTORY: 0, | |
PROGRAMMING: 1, | |
MUSIC: 2 | |
}; | |
const CATEGORIES = Object.keys(CLASSIFICATION_CLASSES); //['HISTORY', 'PROGRAMMING', 'MUSIC'], | |
const HISTORY_TEXT = [ | |
"The end of the Viking-era in Norway is marked by the Battle of Stiklestad in 1030", | |
"The end of the Viking Age is traditionally marked in England by the failed invasion attempted by the Norwegian king Harald III ", | |
"The earliest date given for a Viking raid is 787 AD when, according to the Anglo-Saxon Chronicle, a group of men from Norway sailed to the Isle of Portland in Dorset" | |
]; | |
const PROGRAMMING_TEXT = [ | |
"A programming language is a formal constructed language designed to communicate instructions to a machine, particularly a computer. Programming languages can be used to create programs to control the behavior of a machine or to express algorithms.", | |
"Thousands of different programming languages have been created, mainly in the computer field, and many more still are being created every year.", | |
"The description of a programming language is usually split into the two components of syntax (form) and semantics (meaning). Some languages are defined by a specification document (for example, the C programming language is specified by an ISO Standard), while other languages (such as Perl) have a dominant implementation that is treated as a reference" | |
]; | |
const MUSIC_TEXT = [ | |
"Classical music is art music produced or rooted in the traditions of Western music (both liturgical and secular)", | |
"European music is largely distinguished from many other non-European and popular musical forms by its system of staff notation, in use since about the 16th century", | |
"classical music has been noted for its development of highly sophisticated forms of instrumental music." | |
]; | |
const DICTIONARY = mimir.dict([ | |
...HISTORY_TEXT, | |
...PROGRAMMING_TEXT, | |
...MUSIC_TEXT | |
]) | |
const TRAINING_DATA = [ | |
...HISTORY_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CLASSIFICATION_CLASSES.HISTORY, CATEGORIES.length) | |
})), | |
...PROGRAMMING_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CLASSIFICATION_CLASSES.PROGRAMMING, CATEGORIES.length) | |
})), | |
...MUSIC_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CLASSIFICATION_CLASSES.MUSIC, CATEGORIES.length) | |
})) | |
]; | |
const test_history = mimir.bow("The beginning of the Viking Age in the British Isles is, however, often given as 793.", DICTIONARY); | |
const test_music = mimir.bow("Baroque music is a style of Western art music composed from approximately 1600 to 1750", DICTIONARY); | |
const net = new brain.NeuralNetwork(); | |
net.train(TRAINING_DATA); | |
console.log('------------------- ANN (brain) ----------------------'); | |
console.log(CATEGORIES[maxarg(net.run(test_history))]); // prints HISTORY | |
console.log(CATEGORIES[maxarg(net.run(test_music))]); // prints MUSIC |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const mimir = require('mimir'); | |
const brain = require('brain.js'); | |
const {apply, compose, curry, flip, indexOf, keys, nth} = require('ramda'); | |
// our output is recorded as a vector, where the index in the vector | |
// is the enum (we create it with a zero index). We fill the index in | |
// the array with a 1 and the rest of the items as a 0 (true and false). | |
const vectorise = (category, numberOfCategories) => { | |
const vector = new Array(numberOfCategories).fill(0); | |
vector[category] = 1; | |
return vector; | |
} | |
const getCategoryFromAnswer = curry((categoriesEnum, answerVector) => { | |
const highestConfidenceByIndex = compose( | |
flip(nth)(categoriesEnum), | |
flip(indexOf)(answerVector), | |
apply(Math.max) | |
); | |
return highestConfidenceByIndex(answerVector); | |
}) | |
// train data | |
const CATEGORIES_ENUM = { | |
HISTORY: 0, | |
PROGRAMMING: 1, | |
MUSIC: 2 | |
}; | |
const CATEGORIES = keys(CATEGORIES_ENUM); //['HISTORY', 'PROGRAMMING', 'MUSIC'], | |
const HISTORY_TEXT = [ | |
"The end of the Viking-era in Norway is marked by the Battle of Stiklestad in 1030", | |
"The end of the Viking Age is traditionally marked in England by the failed invasion attempted by the Norwegian king Harald III ", | |
"The earliest date given for a Viking raid is 787 AD when, according to the Anglo-Saxon Chronicle, a group of men from Norway sailed to the Isle of Portland in Dorset" | |
]; | |
const PROGRAMMING_TEXT = [ | |
"A programming language is a formal constructed language designed to communicate instructions to a machine, particularly a computer. Programming languages can be used to create programs to control the behavior of a machine or to express algorithms.", | |
"Thousands of different programming languages have been created, mainly in the computer field, and many more still are being created every year.", | |
"The description of a programming language is usually split into the two components of syntax (form) and semantics (meaning). Some languages are defined by a specification document (for example, the C programming language is specified by an ISO Standard), while other languages (such as Perl) have a dominant implementation that is treated as a reference" | |
]; | |
const MUSIC_TEXT = [ | |
"Classical music is art music produced or rooted in the traditions of Western music (both liturgical and secular)", | |
"European music is largely distinguished from many other non-European and popular musical forms by its system of staff notation, in use since about the 16th century", | |
"classical music has been noted for its development of highly sophisticated forms of instrumental music." | |
]; | |
const DICTIONARY = mimir.dict([ | |
...HISTORY_TEXT, | |
...PROGRAMMING_TEXT, | |
...MUSIC_TEXT | |
]) | |
const TRAINING_DATA = [ | |
...HISTORY_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CATEGORIES_ENUM.HISTORY, CATEGORIES.length) | |
})), | |
...PROGRAMMING_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CATEGORIES_ENUM.PROGRAMMING, CATEGORIES.length) | |
})), | |
...MUSIC_TEXT.map((text) => ({ | |
input: mimir.bow(text, DICTIONARY), | |
output: vectorise(CATEGORIES_ENUM.MUSIC, CATEGORIES.length) | |
})) | |
]; | |
const test_history = mimir.bow("The beginning of the Viking Age in the British Isles is, however, often given as 793.", DICTIONARY); | |
const test_music = mimir.bow("Baroque music is a style of Western art music composed from approximately 1600 to 1750", DICTIONARY); | |
const net = new brain.NeuralNetwork(); | |
net.train(TRAINING_DATA); | |
console.log('------------------- ANN (brain) ----------------------'); | |
console.log(getCategoryFromAnswer(CATEGORIES, net.run(test_history))); // prints HISTORY | |
console.log(getCategoryFromAnswer(CATEGORIES, net.run(test_music))); // prints MUSIC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment