Created
August 31, 2011 22:20
-
-
Save gjcourt/1184893 to your computer and use it in GitHub Desktop.
Spellcheck
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pprint | |
import os | |
path = os.path.join(os.path.dirname(__file__), 'words.txt') | |
vowel_pattern = re.compile('[aeiou]', re.IGNORECASE) | |
def run(): | |
"""runs the program""" | |
input = raw_input('>') | |
suggestions = spellcheck(input) | |
if len(suggestions) == 1: | |
print 'did you mean ' + suggestions[0].group() + '?' | |
run() | |
if len(suggestions) > 1: | |
min = 1024 | |
best = None | |
for s in suggestions: | |
str = s.group() | |
val = calcDistance(input, str) | |
if val < min: | |
min = val | |
best = str | |
print 'did you mean ' + best + '?' | |
run() | |
else: | |
print 'no suggestions' | |
run() | |
def buildPattern(word): | |
"""takes a word and builds a pattern | |
based on the defined rules""" | |
lst = ['^'] | |
prevChar = None | |
#build regular expression | |
for i in range(len(word)): | |
if prevChar and prevChar == word[i]: | |
if vowel_pattern.match(word[i]): | |
lst.append('[aeiou]*') | |
else: | |
lst.append(word[i] + '*') | |
else: | |
if vowel_pattern.match(word[i]): | |
lst.append('[aeiou]') | |
else: | |
lst.append(word[i]) | |
#set the previous char | |
prevChar = word[i] | |
#return the regular expression | |
lst.append('$') | |
return re.compile(''.join(lst), re.IGNORECASE) | |
def spellcheck(word): | |
"""tests a dictionary based on a set of rules | |
defined by the buildPattern definition""" | |
pattern = buildPattern(word) | |
dictionary = open(path, 'r') | |
suggestions = [] | |
#loop over words in the dictionary | |
for word in dictionary: | |
m = pattern.match(word) | |
if m: | |
suggestions.append(m) | |
return suggestions | |
def calcDistance(s, t): | |
# build multi-dimensional array | |
d = [[0] * (len(t) + 1) for i in range(len(s) + 1)] | |
for i in range(len(s)+1): | |
d[i][0] = i | |
for j in range(len(t)+1): | |
d[0][j] = j | |
#build table | |
for j in [x+1 for x in range(len(t))]: | |
for i in [x+1 for x in range(len(s))]: | |
a = i-1 | |
b = j-1 | |
if t[b] == s[a]: | |
d[i][j] = d[i-1][j-1] | |
else: | |
d[i][j] = min( d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + 1) | |
#return replacements | |
return d[len(s)][len(t)] | |
if __name__ == "__main__": | |
"""test running module for the | |
program""" | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment