Skip to content

Instantly share code, notes, and snippets.

@gjcourt
Created August 31, 2011 22:20
Show Gist options
  • Save gjcourt/1184893 to your computer and use it in GitHub Desktop.
Save gjcourt/1184893 to your computer and use it in GitHub Desktop.
Spellcheck
import re
import pprint
import os
path = os.path.join(os.path.dirname(__file__), 'words.txt')
vowel_pattern = re.compile('[aeiou]', re.IGNORECASE)
def run():
"""runs the program"""
input = raw_input('>')
suggestions = spellcheck(input)
if len(suggestions) == 1:
print 'did you mean ' + suggestions[0].group() + '?'
run()
if len(suggestions) > 1:
min = 1024
best = None
for s in suggestions:
str = s.group()
val = calcDistance(input, str)
if val < min:
min = val
best = str
print 'did you mean ' + best + '?'
run()
else:
print 'no suggestions'
run()
def buildPattern(word):
"""takes a word and builds a pattern
based on the defined rules"""
lst = ['^']
prevChar = None
#build regular expression
for i in range(len(word)):
if prevChar and prevChar == word[i]:
if vowel_pattern.match(word[i]):
lst.append('[aeiou]*')
else:
lst.append(word[i] + '*')
else:
if vowel_pattern.match(word[i]):
lst.append('[aeiou]')
else:
lst.append(word[i])
#set the previous char
prevChar = word[i]
#return the regular expression
lst.append('$')
return re.compile(''.join(lst), re.IGNORECASE)
def spellcheck(word):
"""tests a dictionary based on a set of rules
defined by the buildPattern definition"""
pattern = buildPattern(word)
dictionary = open(path, 'r')
suggestions = []
#loop over words in the dictionary
for word in dictionary:
m = pattern.match(word)
if m:
suggestions.append(m)
return suggestions
def calcDistance(s, t):
# build multi-dimensional array
d = [[0] * (len(t) + 1) for i in range(len(s) + 1)]
for i in range(len(s)+1):
d[i][0] = i
for j in range(len(t)+1):
d[0][j] = j
#build table
for j in [x+1 for x in range(len(t))]:
for i in [x+1 for x in range(len(s))]:
a = i-1
b = j-1
if t[b] == s[a]:
d[i][j] = d[i-1][j-1]
else:
d[i][j] = min( d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + 1)
#return replacements
return d[len(s)][len(t)]
if __name__ == "__main__":
"""test running module for the
program"""
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment