Created
December 24, 2010 03:35
Node.js as an alertnative to Lisp ;-)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Node.js as an alertnative to Lisp ;-) | |
// see http://norvig.com/java-lisp.html | |
// and http://www.flownet.com/ron/papers/lisp-java/instructions.html | |
// all we need from node.js is file reading | |
var fs = require('fs'); | |
// mappings from characters to digits | |
// (we'll use this to build a mapping from | |
// digit sequences to dictionary words) | |
var char2int = { | |
e: 0, | |
j: 1, n: 1, q: 1, | |
r: 2, w: 2, x: 2, | |
d: 3, s: 3, y: 3, | |
f: 4, t: 4, | |
a: 5, m: 5, | |
c: 6, i: 6, v: 6, | |
b: 7, k: 7, u: 7, | |
l: 8, o: 8, p: 8, | |
g: 9, h: 9, z: 9 | |
} | |
// convenience for mapping arrays: | |
function lookup(c) { return char2int[c] }; | |
// open the dictionary as one big string | |
var rawDictionary = fs.readFileSync('dictionary.txt', 'utf8'); | |
// build a mapping from clean (lowercase, no punctuation) | |
// words to original words from the dictionary, ignore whitespace | |
var dictionary = {}; | |
rawDictionary.split('\n').forEach(function(word) { | |
word = word.trim(); | |
if (word.length > 0) { | |
dictionary[word.replace(/[\"\-]/g,'').toLowerCase()] = word; | |
} | |
}); | |
// build a mapping from digit encodings for each dictionary word | |
var encoded = {}; | |
Object.keys(dictionary).forEach(function(word) { | |
var digits = word.split('').map(lookup).join(''); | |
if (!(digits in encoded)) { | |
encoded[digits] = []; | |
} | |
encoded[digits].push(dictionary[word]); | |
}); | |
// stream the input so it can be any length: | |
var input = fs.createReadStream('input.txt'); | |
var line = ''; | |
// streams don't guarantee that we'll get whole lines, so | |
// we need to look for line endings as we go | |
input.on('data', function(chunk) { | |
line += chunk; | |
var index = line.indexOf('\n'); | |
if (index >= 0) { | |
emit(line.slice(0,index)); | |
line = line.slice(index+1); | |
} | |
}); | |
// when we get to the end of the stream, use whatever's left | |
input.on('end', function() { | |
var lines = line.split('\n'); | |
lines.forEach(emit); | |
}); | |
// take a line, find the numbers and emit matching encodings | |
function emit(line) { | |
var digits = line.match(/\d/g); | |
var matches = null; | |
if (digits && digits.length > 0) { | |
matches = findMatches(digits); | |
} | |
if (matches && matches.length) { | |
for (var i = 0; i < matches.length; i++) { | |
console.log('%s: %s', line, matches[i]); | |
} | |
} | |
} | |
// take an array of digits and search for encodings | |
function findMatches(digits) { | |
// single digits are left alone | |
if (digits.length == 1) return digits; | |
// now search for matches at the beginning of digits | |
var test = ''; | |
var starts = []; | |
for (var i = digits.length; i >= 1; i--) { | |
test = digits.slice(0,i).join(''); | |
if (test in encoded) { | |
starts = starts.concat(encoded[test]) | |
} | |
} | |
// if we didn't find any, preserve the first digit | |
// and search again. | |
// don't automatically add the start because then | |
// we'd need to catch repeat digits. | |
if (starts.length == 0) { | |
for (var i = digits.length; i >= 2; i--) { | |
test = digits.slice(1,i).join(''); | |
if (test in encoded) { | |
encoded[test].forEach(function(w) { | |
starts.push(digits[0] + ' ' + w); | |
}); | |
} | |
} | |
} | |
// now take each of those initial matches and either | |
// (a) finish with a single digit | |
// (b) recurse using the remaining digits, or | |
// (c) return the match if it's consumed all the digits | |
var matches = []; | |
starts.forEach(function(s) { | |
var consumed = s.replace(/[\"\- ]/g,'').trim().length; | |
if (digits.length - consumed == 1) { | |
matches.push(s + ' ' + digits[digits.length-1]); | |
} | |
else if (consumed < digits.length) { | |
var next = findMatches(digits.slice(consumed)); | |
if (next.length) { | |
next.forEach(function(n) { | |
matches.push(s + ' ' + n); | |
}); | |
} | |
// if next doesn't contain anything, discard this match | |
} | |
else { | |
matches.push(s); | |
} | |
}); | |
return matches; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment