Created
September 28, 2012 11:49
-
-
Save shihongzhi/3799352 to your computer and use it in GitHub Desktop.
编辑距离damerau_levenshtein算法 http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def damerau_levenshtein_distance(source, target): | |
if not source and not target: | |
return 0 | |
elif not source: | |
return len(target) | |
elif not target: | |
return len(source) | |
m = len(source) | |
n = len(target) | |
h = [[0 for col in range(n+2)] for row in range(m+2)] | |
inf = m + n | |
h[0][0] = inf | |
for i in range(m+1): | |
h[i+1][1] = i | |
h[i+1][0] = inf | |
for j in range(n+1): | |
h[1][j+1] = j | |
h[0][j+1] = inf | |
sd = {} | |
for x in source+target: | |
if x not in sd: | |
sd[x] = 0 | |
for i in range(1, m+1): | |
db = 0 | |
for j in range(1, n+1): | |
i1 = sd[target[j-1]] | |
j1 = db | |
if source[i-1] == target[j-1]: | |
h[i+1][j+1] = h[i][j] | |
db = j | |
else: | |
h[i+1][j+1] = min(h[i][j], h[i+1][j], h[i][j+1]) + 1 | |
h[i+1][j+1] = min(h[i+1][j+1], h[i1][j1] + (i-i1-1) + 1 + (j-j1-1)) | |
sd[source[i-1]] = i; | |
return h[m+1][n+1] | |
print damerau_levenshtein_distance('CA', 'ABC') | |
===》 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment