Last active
June 10, 2021 00:16
-
-
Save mocobeta/6738792 to your computer and use it in GitHub Desktop.
途中結果を表示しながら編集距離(レーベンシュタイン距離)を計算する。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: utf-8 -*- | |
def ed(s1, s2, detail=False): | |
u""" s1, s2 の編集距離を計算する. ※置換のコストは 1 """ | |
len_s1 = len(s1) | |
len_s2 = len(s2) | |
# initialize | |
m = [[0 for i in range(len_s2 + 1)] for j in range(len_s1 + 1)] | |
for i in range(1, (len_s1+1)): | |
m[i][0] = i | |
if detail: show_status(s1, s2, m, i, 0) | |
for j in range(1, (len_s2+1)): | |
m[0][j] = j | |
if detail: show_status(s1, s2, m, 0, j) | |
# calculate edit distance | |
for i in range(1, len_s1+1): | |
for j in range(1, len_s2+1): | |
m[i][j] = min( | |
m[i-1][j-1] + (0 if s1[i-1] == s2[j-1] else 1), | |
m[i-1][j] + 1, | |
m[i][j-1] + 1 | |
) | |
if detail: show_status(s1, s2, m, i, j) | |
return m[len_s1][len_s2] | |
def show_status(s1, s2, m, i, j): | |
u""" 中間結果の表示 """ | |
if i == 0: | |
print '"" と %s の編集距離は %d' % (s2[:j], m[i][j]) | |
elif j == 0: | |
print '%s と "" の編集距離は %d' % (s1[:i], m[i][j]) | |
else: | |
print '%s と %s の編集距離は %d' % (s1[:i], s2[:j], m[i][j]) | |
print ' ' * 2 + '|' + ' ' * 6 + ' '.join(s2) | |
print '-' * 2 + '+' + '---' * (len(s2)+2) | |
for k, r in enumerate(m): | |
print s1[k-1] if k > 0 else ' ', | |
print '|', | |
print ''.join(['%3d' % e | |
if (k == 0 and l == 0) or (k < i and l < j) or e > 0 | |
else ' ' * 2 +'*' | |
for l, e in enumerate(r)]) | |
print '' | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv) < 3: | |
print 'Usage: ed.py <str1> <str2>' | |
sys.exit(0) | |
s1 = sys.argv[1] | |
s2 = sys.argv[2] | |
print '%s と %s の編集距離: %d' % (s1, s2, ed(s1, s2, True)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(例) dog と box の編集距離 | |
$ python ed.py dog box | |
d と "" の編集距離は 1 | |
| b o x | |
--+--------------- | |
| 0 * * * | |
d | 1 * * * | |
o | * * * * | |
g | * * * * | |
do と "" の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 * * * | |
d | 1 * * * | |
o | 2 * * * | |
g | * * * * | |
dog と "" の編集距離は 3 | |
| b o x | |
--+--------------- | |
| 0 * * * | |
d | 1 * * * | |
o | 2 * * * | |
g | 3 * * * | |
"" と b の編集距離は 1 | |
| b o x | |
--+--------------- | |
| 0 1 * * | |
d | 1 * * * | |
o | 2 * * * | |
g | 3 * * * | |
"" と bo の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 * | |
d | 1 * * * | |
o | 2 * * * | |
g | 3 * * * | |
"" と box の編集距離は 3 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 * * * | |
o | 2 * * * | |
g | 3 * * * | |
d と b の編集距離は 1 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 * * | |
o | 2 * * * | |
g | 3 * * * | |
d と bo の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 * | |
o | 2 * * * | |
g | 3 * * * | |
d と box の編集距離は 3 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 * * * | |
g | 3 * * * | |
do と b の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 * * | |
g | 3 * * * | |
do と bo の編集距離は 1 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 1 * | |
g | 3 * * * | |
do と box の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 1 2 | |
g | 3 * * * | |
dog と b の編集距離は 3 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 1 2 | |
g | 3 3 * * | |
dog と bo の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 1 2 | |
g | 3 3 2 * | |
dog と box の編集距離は 2 | |
| b o x | |
--+--------------- | |
| 0 1 2 3 | |
d | 1 1 2 3 | |
o | 2 2 1 2 | |
g | 3 3 2 2 | |
dog と box の編集距離: 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment