Skip to content

Instantly share code, notes, and snippets.

@n-bar
Created March 19, 2016 18:34
Show Gist options
  • Save n-bar/0a179f782ea99b05e612 to your computer and use it in GitHub Desktop.
Save n-bar/0a179f782ea99b05e612 to your computer and use it in GitHub Desktop.
# https://ru.wikibooks.org/wiki/Реализации_алгоритмов/Расстояние_Левенштейна#Python
def distance(a, b):
"Calculates the Levenshtein distance between a and b."
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a, b = b, a
n, m = m, n
current_row = range(n+1) # Keep current and previous row, not entire matrix
for i in range(1, m+1):
previous_row, current_row = current_row, [i]+[0]*n
for j in range(1,n+1):
add, delete, change = previous_row[j]+1, current_row[j-1]+1, previous_row[j-1]
if a[j-1] != b[i-1]:
change += 1
current_row[j] = min(add, delete, change)
return current_row[n]
# https://habrahabr.ru/post/279585/
def distance_2(text, pattern):
"Calculates the Levenshtein distance between text and pattern."
text_len, pattern_len = len(text), len(pattern)
current_column = range(pattern_len+1)
min_value = pattern_len
end_pos = 0
for i in range(1, text_len+1):
previous_column, current_column = current_column, [0]*(pattern_len+1) # !!!
for j in range(1,pattern_len+1):
add, delete, change = previous_column[j]+1, current_column[j-1]+1, previous_column[j-1]
if pattern[j-1] != text[i-1]:
change += 1
current_column[j] = min(add, delete, change)
if min_value > current_column[pattern_len]: # !!!
min_value = current_column[pattern_len]
end_pos = i
return min_value, end_pos
print distance_2(u'аргумент', u'рудимент') #3, 8
print distance_2(u'весомый аргумент в мою пользу', u'рудимент') #3, 16
def distance_3(text, pattern):
min_value, end_pos = distance_2(text, pattern)
min_value, start_pos = distance_2(text[end_pos-1::-1], pattern[::-1])
start_pos = end_pos - start_pos
return min_value, start_pos, end_pos, text[start_pos:end_pos], pattern
print distance_3(u'аргумент', u'рудимент') # 3 3 8 умент рудимент
print distance_3(u'весомый аргумент в мою пользу', u'рудимент') # 3 11 16 умент рудимент
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment