Skip to content

Instantly share code, notes, and snippets.

@DavidBruant
Created July 10, 2015 15:51
Show Gist options
  • Save DavidBruant/264e4dd3e15ce68117a7 to your computer and use it in GitHub Desktop.
Save DavidBruant/264e4dd3e15ce68117a7 to your computer and use it in GitHub Desktop.
Playing with Natural
"use strict";
var natural = require('natural');
var tokenizer = new natural.WordTokenizer();
var stopwordsEn = natural.stopwords;
var stopwordsFr = require('natural/lib/natural/util/stopwords_fr.js').words;
var TokenizerFr = require('natural/lib/natural/tokenizers/aggressive_tokenizer_fr.js');
var frTok = new TokenizerFr();
var tokenizeEn = tokenizer.tokenize.bind(tokenizer);
var tokenizeFr = frTok.tokenize.bind(frTok);
var stemEn = natural.PorterStemmer.stem;
var stemFr = natural.PorterStemmerFr.stem;
function filterStopWordsEn(tokens){
return tokens.filter(function(t){
return stopwordsEn.indexOf(t) === -1;
});
}
function filterStopWordsFr(tokens){
return tokens.filter(function(t){
return stopwordsFr.indexOf(t) === -1;
});
}
function toStemsEn(str){
return stemEn( filterStopWordsEn( tokenizeEn(str) ).join(' ') )
}
function toStemsFr(str){
return stemFr( filterStopWordsFr( tokenizeFr(str) ).join(' ') )
}
console.log( toStemsEn("i am waking up to the sounds of chainsaws") );
console.log( toStemsFr("je vais bien, je vais bien, c'est une mélodie #nekfeu sans fin") );
console.log( tokenizeFr("je vais bien, je vais bien, c'est une mélodie #nekfeu sans fin") );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment