Last active
January 24, 2024 20:44
-
-
Save tarneaux/bf748c99f28555d6477531505eb00d1b to your computer and use it in GitHub Desktop.
Levenshtein and Damerau-Levenshtein distances in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is my (tarneaux's) implementation of the Levenshtein and Damerau-Levenshtein distances, | |
# in a recursive and non-optimized manner based on the formulas on Wikipedia. | |
# For more complete and/or optimized versions of them see: | |
# - https://github.com/TheAlgorithms/Python/blob/master/strings/levenshtein_distance.py | |
# - https://github.com/TheAlgorithms/Python/blob/master/strings/damerau_levenshtein_distance.py | |
# See https://en.wikipedia.org/wiki/Levenshtein_distance | |
def levenshtein(s1, s2): | |
if len(s1) == 0: | |
return len(s2) | |
elif len(s2) == 0: | |
return len(s1) | |
elif s1[0] == s2[0]: | |
# the character is the same, nothing changes. | |
return levenshtein(s1[1:], s2[1:]) | |
else: | |
return 1 + min( | |
levenshtein(s1[1:], s2), # deletion | |
levenshtein(s1, s2[1:]), # insertion | |
levenshtein(s1[1:], s2[1:]) # substitution | |
) | |
# Helper for Damerau-Levenshtein distance | |
def indicator(s1, s2): | |
return 0 if s1[-1] == s2[-1] else 1 | |
# Also allows transpositions in its operations | |
# See https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance | |
def damerau_levenshtein(s1, s2): | |
values = [] | |
if len(s1) == 0 and len(s2) == 0: | |
# the strings are empty, nothing changes. | |
values.append(0) | |
if len(s1) > 0: | |
# there's something in s1, try to remove it! | |
values.append(damerau_levenshtein(s1[:-1], s2) + 1) | |
if len(s2) > 0: | |
# there's something in s2, try to remove it! | |
values.append(damerau_levenshtein(s1, s2[:-1]) + 1) | |
if len(s1) > 0 and len(s2) > 0: | |
# there's something in both s1 and s2, try to change it! | |
values.append(damerau_levenshtein(s1[:-1], s2[:-1]) + indicator(s1, s2)) | |
if len(s1) > 1 and len(s2) > 1 and s1[-2] == s2[-1] and s1[-1] == s2[-2]: | |
# we can try a transposition between s1 and s2's last characters. | |
values.append(damerau_levenshtein(s1[:-2], s2[:-2]) + indicator(s1, s2)) | |
return min(values) | |
if __name__ == "__main__": | |
s1, s2 = "hellow", "hellwo" | |
print(damerau_levenshtein(s1, s2)) | |
print(levenshtein(s1, s2)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment