corbanbrook · October 26, 2009 18:06
diff --git a/gistfile1.rb b/gistfile1.rb
 # Didn't realize this but Brian Adkins wrote a ruby version as well, code is almost identical to my own.

 def words text
  text.downcase.scan(/[a-z]+/)
 end

 def train features
  model = Hash.new(1)
  features.each {|f| model[f] += 1 }
  return model
 end

 NWORDS = train(words(File.new('holmes.txt').read))
 LETTERS = ("a".."z").to_a.join

 def edits1 word
  n = word.length
  deletion = (0...n).collect {|i| word[0...i]+word[i+1..-1] }
  transposition = (0...n-1).collect {|i| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
  alteration = []
  n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i]+l.chr+word[i+1..-1] } }
  insertion = []
  (n+1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i]+l.chr+word[i..-1] } }
  result = deletion + transposition + alteration + insertion
  result.empty? ? nil : result
 end

 def known_edits2 word
  result = []
  edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if NWORDS.has_key?(e2) }}
  result.empty? ? nil : result
 end

 def known words
  result = words.find_all {|w| NWORDS.has_key?(w) }
  result.empty? ? nil : result
 end

 def correct word
  (known([word]) or known(edits1(word)) or known_edits2(word) or
    [word]).max {|a,b| NWORDS[a] <=> NWORDS[b] }
 end
diff --git a/spell_correct.py b/spell_correct.py
 # Jordan Baker's Python Presentation: http://www.slideshare.net/hexsprite/a-taste-of-python-devdays-toronto-2009

 # 23 lines of Python code by Peter Norvig http://norvig.com/spell-correct.html

 import re, collections

 def words(text): 
    return re.findall('[a-z]+', text.lower()) 

 def train(features):
    model = collections.defaultdict(lambda: 1)
    for f in features:
        model[f] += 1
    return model

 NWORDS = train(words(file('big.txt').read()))

 alphabet = 'abcdefghijklmnopqrstuvwxyz'

 def edits1(word):
   s = [(word[:i], word[i:]) for i in range(len(word) + 1)]
   deletes    = [a + b[1:] for a, b in s if b]
   transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1]
   replaces   = [a + c + b[1:] for a, b in s for c in alphabet if b]
   inserts    = [a + c + b     for a, b in s for c in alphabet]
   return set(deletes + transposes + replaces + inserts)

 def known_edits2(word):
    return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)

 def known(words): return set(w for w in words if w in NWORDS)

 def correct(word):
    candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word]
    return max(candidates, key=NWORDS.get)
diff --git a/spell_correct.rb b/spell_correct.rb
 # 30 lines of Ruby code by Corban Brook (24 lines exluding 'end')

 def words(text)
  text.downcase.scan /[a-z]+/
 end

 def train(words)
  model = Hash.new(0)
  words.each { |w| model[w] += 1 }
  model
 end

 NWORDS = train(words(File.read('big.txt')))
 ALPHABET = *('a'..'z')

 def edits1(word)
  s = (0..word.length).collect { |i| [word[0, i], word[i, word.length]] }
  deletes    = s.collect { |a, b| a + b[1, b.length] if b.any? }.compact
  transposes = s.collect { |a, b| a + b[1, 1] + b[0, 1] + b[2, b.length] if b.length > 1 }.compact
  replaces   = s.collect { |a, b| ALPHABET.collect { |c| a + c + b[1, b.length] if b.any? } }.flatten.compact
  inserts    = s.collect { |a, b| ALPHABET.collect { |c| a + c + b } }.flatten
  deletes + transposes + replaces + inserts
 end

 def known_edits2(word)
  result = []
  edits1(word).each { |e1| result += edits1(e1).select { |e2| NWORDS.has_key? e2 } }
  result.empty? ? nil : result.uniq
 end

 def known(words)
  result = words.select { |w| NWORDS.has_key? w }
  result.empty? ? nil : result
 end

 def correct(word)
  (known([word]) or known(edits1(word)) or known_edits2(word) or [word]).max { |a, b| NWORDS[a] <=> NWORDS[b] }
 end
	# Didn't realize this but Brian Adkins wrote a ruby version as well, code is almost identical to my own.

	def words text
	text.downcase.scan(/[a-z]+/)
	end

	def train features
	model = Hash.new(1)
	features.each {\|f\| model[f] += 1 }
	return model
	end

	NWORDS = train(words(File.new('holmes.txt').read))
	LETTERS = ("a".."z").to_a.join

	def edits1 word
	n = word.length
	deletion = (0...n).collect {\|i\| word[0...i]+word[i+1..-1] }
	transposition = (0...n-1).collect {\|i\| word[0...i]+word[i+1,1]+word[i,1]+word[i+2..-1] }
	alteration = []
	n.times {\|i\| LETTERS.each_byte {\|l\| alteration << word[0...i]+l.chr+word[i+1..-1] } }
	insertion = []
	(n+1).times {\|i\| LETTERS.each_byte {\|l\| insertion << word[0...i]+l.chr+word[i..-1] } }
	result = deletion + transposition + alteration + insertion
	result.empty? ? nil : result
	end

	def known_edits2 word
	result = []
	edits1(word).each {\|e1\| edits1(e1).each {\|e2\| result << e2 if NWORDS.has_key?(e2) }}
	result.empty? ? nil : result
	end

	def known words
	result = words.find_all {\|w\| NWORDS.has_key?(w) }
	result.empty? ? nil : result
	end

	def correct word
	(known([word]) or known(edits1(word)) or known_edits2(word) or
	[word]).max {\|a,b\| NWORDS[a] <=> NWORDS[b] }
	end
	# Jordan Baker's Python Presentation: http://www.slideshare.net/hexsprite/a-taste-of-python-devdays-toronto-2009

	# 23 lines of Python code by Peter Norvig http://norvig.com/spell-correct.html

	import re, collections

	def words(text):
	return re.findall('[a-z]+', text.lower())

	def train(features):
	model = collections.defaultdict(lambda: 1)
	for f in features:
	model[f] += 1
	return model

	NWORDS = train(words(file('big.txt').read()))

	alphabet = 'abcdefghijklmnopqrstuvwxyz'

	def edits1(word):
	s = [(word[:i], word[i:]) for i in range(len(word) + 1)]
	deletes = [a + b[1:] for a, b in s if b]
	transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1]
	replaces = [a + c + b[1:] for a, b in s for c in alphabet if b]
	inserts = [a + c + b for a, b in s for c in alphabet]
	return set(deletes + transposes + replaces + inserts)

	def known_edits2(word):
	return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)

	def known(words): return set(w for w in words if w in NWORDS)

	def correct(word):
	candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word]
	return max(candidates, key=NWORDS.get)
	# 30 lines of Ruby code by Corban Brook (24 lines exluding 'end')

	def words(text)
	text.downcase.scan /[a-z]+/
	end

	def train(words)
	model = Hash.new(0)
	words.each { \|w\| model[w] += 1 }
	model
	end

	NWORDS = train(words(File.read('big.txt')))
	ALPHABET = *('a'..'z')

	def edits1(word)
	s = (0..word.length).collect { \|i\| [word[0, i], word[i, word.length]] }
	deletes = s.collect { \|a, b\| a + b[1, b.length] if b.any? }.compact
	transposes = s.collect { \|a, b\| a + b[1, 1] + b[0, 1] + b[2, b.length] if b.length > 1 }.compact
	replaces = s.collect { \|a, b\| ALPHABET.collect { \|c\| a + c + b[1, b.length] if b.any? } }.flatten.compact
	inserts = s.collect { \|a, b\| ALPHABET.collect { \|c\| a + c + b } }.flatten
	deletes + transposes + replaces + inserts
	end

	def known_edits2(word)
	result = []
	edits1(word).each { \|e1\| result += edits1(e1).select { \|e2\| NWORDS.has_key? e2 } }
	result.empty? ? nil : result.uniq
	end

	def known(words)
	result = words.select { \|w\| NWORDS.has_key? w }
	result.empty? ? nil : result
	end

	def correct(word)
	(known([word]) or known(edits1(word)) or known_edits2(word) or [word]).max { \|a, b\| NWORDS[a] <=> NWORDS[b] }
	end