Skip to content

Instantly share code, notes, and snippets.

@tomoemon
Last active April 21, 2016 10:58
Show Gist options
  • Save tomoemon/ca02045cf4097d19b548aedba884a28e to your computer and use it in GitHub Desktop.
Save tomoemon/ca02045cf4097d19b548aedba884a28e to your computer and use it in GitHub Desktop.
testing Damerau-Levenshtein
def damerau_levenshtein_distance(s1, s2):
from pprint import pprint
from collections import defaultdict
if isinstance(s1, bytes) or isinstance(s2, bytes):
raise TypeError(_no_bytes_err)
len1 = len(s1)
len2 = len(s2)
infinite = len1 + len2
# character array
da = defaultdict(int)
# distance matrix
score = [[0]*(len2+2) for x in range(len1+2)]
score[0][0] = infinite
for i in range(0, len1+1):
score[i+1][0] = infinite
score[i+1][1] = i
for i in range(0, len2+1):
score[0][i+1] = infinite
score[1][i+1] = i
for i in range(1, len1+1):
db = 0
print("s1:" , s1[i-1])
for j in range(1, len2+1):
print("s2:", s2[j-1])
pprint(score)
i1 = da[s2[j-1]]
j1 = db
cost = 1
if s1[i-1] == s2[j-1]:
cost = 0
db = j
print("i:{}, j:{}, i1:{}, j1:{}, db:{}, da:{}".format(i, j, i1, j1, db, da))
min_score = min(score[i][j] + cost,
score[i+1][j] + 1,
score[i][j+1] + 1,
score[i1][j1] + (i-i1-1) + 1 + (j-j1-1))
score[i+1][j+1] = min_score
# for debugging
min_types = []
if min_score == score[i][j] + cost:
min_types.append(("no " if cost == 0 else "") + "substitution")
if min_score == score[i+1][j] + 1:
min_types.append("insertion")
if min_score == score[i][j+1] + 1:
min_types.append("deletion")
if min_score == score[i1][j1] + (i-i1-1) + 1 + (j-j1-1):
min_types.append("transposition")
print("min: {} ({})".format(min_score, min_types))
print()
da[s1[i-1]] = i
print("finished")
pprint(score)
return score[len1+1][len2+1]
"""
>>> damerau_levenshtein_distance("cat", "tact")
s1: c
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 0, 0, 0, 0],
[7, 2, 0, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:1, j:1, i1:0, j1:0, db:0, da:defaultdict(<class 'int'>, {'t': 0})
min: 1 (['substitution'])
s2: a
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 0, 0, 0],
[7, 2, 0, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:1, j:2, i1:0, j1:0, db:0, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0})
min: 2 (['substitution', 'insertion'])
s2: c
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 0, 0],
[7, 2, 0, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:1, j:3, i1:0, j1:0, db:3, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 0})
min: 2 (['no substitution'])
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 0],
[7, 2, 0, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:1, j:4, i1:0, j1:3, db:3, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 0})
min: 3 (['insertion'])
s1: a
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 0, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:2, j:1, i1:0, j1:0, db:0, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 1})
min: 2 (['substitution', 'deletion'])
s2: a
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 0, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:2, j:2, i1:0, j1:0, db:2, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 1})
min: 1 (['no substitution'])
s2: c
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 0, 0],
[7, 3, 0, 0, 0, 0]]
i:2, j:3, i1:1, j1:2, db:2, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 1})
min: 2 (['insertion', 'transposition'])
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 0],
[7, 3, 0, 0, 0, 0]]
i:2, j:4, i1:0, j1:2, db:2, da:defaultdict(<class 'int'>, {'t': 0, 'a': 0, 'c': 1})
min: 3 (['substitution', 'insertion'])
s1: t
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 3],
[7, 3, 0, 0, 0, 0]]
i:3, j:1, i1:0, j1:0, db:1, da:defaultdict(<class 'int'>, {'t': 0, 'a': 2, 'c': 1})
min: 2 (['no substitution'])
s2: a
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 3],
[7, 3, 2, 0, 0, 0]]
i:3, j:2, i1:2, j1:1, db:1, da:defaultdict(<class 'int'>, {'t': 0, 'a': 2, 'c': 1})
min: 2 (['deletion', 'transposition'])
s2: c
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 3],
[7, 3, 2, 2, 0, 0]]
i:3, j:3, i1:1, j1:1, db:1, da:defaultdict(<class 'int'>, {'t': 0, 'a': 2, 'c': 1})
min: 2 (['substitution'])
s2: t
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 3],
[7, 3, 2, 2, 2, 0]]
i:3, j:4, i1:0, j1:1, db:4, da:defaultdict(<class 'int'>, {'t': 0, 'a': 2, 'c': 1})
min: 2 (['no substitution'])
finished
[[7, 7, 7, 7, 7, 7],
[7, 0, 1, 2, 3, 4],
[7, 1, 1, 2, 2, 3],
[7, 2, 2, 1, 2, 3],
[7, 3, 2, 2, 2, 2]]
2
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment