Last active
August 29, 2015 14:03
-
-
Save Peleg/f0c755488b8fc9589989 to your computer and use it in GitHub Desktop.
T9 Dictionary for node.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Usage: | |
// var T9Dictionary = require('./t9_dictionary') | |
// var dict = new T9Dictionary(<path to text file>) | |
// dict.suggest(4663) => [ 'good', 'gone', 'home', 'hood', 'hoof' ] | |
module.exports = (function () { | |
// Modules | |
var fs = require('fs'); | |
var _dictionary = {}, | |
_phoneLetters = { | |
a: 2, b: 2, c: 2, d: 3, e: 3, f: 3, g: 4, h: 4, i: 4, | |
j: 5, k: 5, l: 5, m: 6, n: 6, o: 6, p: 7, q: 7, r: 7, | |
s: 7, t: 8, u: 8, v: 8, w: 9, x: 9, y: 9, z: 9 | |
}; | |
// Helper Functions: | |
// Phone spelling of a word ('home' => '4663') | |
function phoneSpelling (word) { | |
if (/[^a-z]/.test(word)) return; // return if contains non letters | |
return word.replace(/[a-z]/g, function (c) { return _phoneLetters[c]; } ); | |
} | |
function T9Dictionary (dictFile) { | |
if (dictFile) this.loadDictionary(dictFile); | |
} | |
T9Dictionary.prototype = { | |
// Reinstates overwritten constructor | |
constructor: T9Dictionary, | |
// Load all the words in _dictionary | |
loadDictionary: function (file) { | |
fs.readFile(file, 'utf-8', function (err, content) { | |
if (err) throw err; | |
var wordCount, phoneWord, word, | |
words = content.toLowerCase().split(/\s+/); | |
// extracting/counting words: | |
// => { '4663': { 'home': 3, <word>: <count> } } | |
for (var i = 0; i < words.length; i++) { | |
word = words[i]; | |
if (phoneWord = phoneSpelling(word)) { | |
_dictionary[phoneWord] = _dictionary[phoneWord] || {}; | |
_dictionary[phoneWord][word] = _dictionary[phoneWord][word] || 0; | |
_dictionary[phoneWord][word] += 1; | |
} | |
} | |
// translating hashes to sorted arrays: | |
// => { '4663': [ [<words with count less than 3>], ['home', <other words that repeat 3 times>] ] } | |
for (var phnWrd in _dictionary) { | |
var wordArr = []; | |
// first transfer keys to values | |
for (var wrd in _dictionary[phnWrd]) { | |
wordCount = _dictionary[phnWrd][wrd]; | |
wordArr[wordCount] = wordArr[wordCount] || []; | |
wordArr[wordCount].push(wrd); | |
} | |
// clear empty vals from array (TODO: there must be a better way) | |
_dictionary[phnWrd] = wordArr.filter(function () { return true; }); | |
} | |
console.log('Dictionary loaded successfully'); | |
}); | |
}, | |
// Suggests <count> words based on <phoneNums> | |
suggest: function (phoneNums, count) { | |
count = count || 5; | |
var words = _dictionary[phoneNums] || [], | |
topWords = []; | |
for (var i = words.length - 1; topWords.length < count && i >= 0; i--) { | |
topWords = topWords.concat(words[i]); | |
} | |
return topWords.slice(0, count); | |
} | |
}; | |
return T9Dictionary; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ah clever I never even considered turning existing dictionary words into their phone number equivalent!