Skip to content

Instantly share code, notes, and snippets.

@jamiees2
Created May 10, 2013 20:39
Show Gist options
  • Select an option

  • Save jamiees2/5557220 to your computer and use it in GitHub Desktop.

Select an option

Save jamiees2/5557220 to your computer and use it in GitHub Desktop.
# Enter your code here. Read input from STDIN. Print output to STDOUT
import re
def read_corpus():
corpus = {}
with open('corpus.txt') as c:
for line in c:
words = re.findall(r"[a-z]+",line.strip().lower())
for word in words:
if word is '':
continue
if not word in corpus:
corpus[word] = 1
else:
corpus[word] += 1
return corpus
def levenshtein(a,b):
'''Calculates the Levenshtein distance between a and b.
From http://hetland.org/coding/python/levenshtein.py'''
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a,b = b,a
n,m = m,n
current = range(n+1)
for i in range(1,m+1):
previous, current = current, [i]+[0]*n
for j in range(1,n+1):
add, delete = previous[j]+1, current[j-1]+1
change = previous[j-1]
if a[j-1] != b[i-1]:
change = change + 1
current[j] = min(add, delete, change)
return current[n]
def find_lowest(corpus,value):
min_count = 2**20
min_val = 0
min_key = None
for key,val in corpus.iteritems():
lev = levenshtein(key,value)
if lev < min_count:
min_count = lev
min_val = val
min_key = key
elif lev is min_count:
if val > min_val:
min_count = lev
min_val = val
min_key = key
return min_key
corpus = read_corpus()
N = input()
for i in xrange(N):
print find_lowest(corpus,raw_input().strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment