Skip to content

Instantly share code, notes, and snippets.

@tarneaux
Last active January 24, 2024 20:44
Show Gist options
  • Save tarneaux/bf748c99f28555d6477531505eb00d1b to your computer and use it in GitHub Desktop.
Save tarneaux/bf748c99f28555d6477531505eb00d1b to your computer and use it in GitHub Desktop.
Levenshtein and Damerau-Levenshtein distances in Python
# This is my (tarneaux's) implementation of the Levenshtein and Damerau-Levenshtein distances,
# in a recursive and non-optimized manner based on the formulas on Wikipedia.
# For more complete and/or optimized versions of them see:
# - https://github.com/TheAlgorithms/Python/blob/master/strings/levenshtein_distance.py
# - https://github.com/TheAlgorithms/Python/blob/master/strings/damerau_levenshtein_distance.py
# See https://en.wikipedia.org/wiki/Levenshtein_distance
def levenshtein(s1, s2):
if len(s1) == 0:
return len(s2)
elif len(s2) == 0:
return len(s1)
elif s1[0] == s2[0]:
# the character is the same, nothing changes.
return levenshtein(s1[1:], s2[1:])
else:
return 1 + min(
levenshtein(s1[1:], s2), # deletion
levenshtein(s1, s2[1:]), # insertion
levenshtein(s1[1:], s2[1:]) # substitution
)
# Helper for Damerau-Levenshtein distance
def indicator(s1, s2):
return 0 if s1[-1] == s2[-1] else 1
# Also allows transpositions in its operations
# See https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
def damerau_levenshtein(s1, s2):
values = []
if len(s1) == 0 and len(s2) == 0:
# the strings are empty, nothing changes.
values.append(0)
if len(s1) > 0:
# there's something in s1, try to remove it!
values.append(damerau_levenshtein(s1[:-1], s2) + 1)
if len(s2) > 0:
# there's something in s2, try to remove it!
values.append(damerau_levenshtein(s1, s2[:-1]) + 1)
if len(s1) > 0 and len(s2) > 0:
# there's something in both s1 and s2, try to change it!
values.append(damerau_levenshtein(s1[:-1], s2[:-1]) + indicator(s1, s2))
if len(s1) > 1 and len(s2) > 1 and s1[-2] == s2[-1] and s1[-1] == s2[-2]:
# we can try a transposition between s1 and s2's last characters.
values.append(damerau_levenshtein(s1[:-2], s2[:-2]) + indicator(s1, s2))
return min(values)
if __name__ == "__main__":
s1, s2 = "hellow", "hellwo"
print(damerau_levenshtein(s1, s2))
print(levenshtein(s1, s2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment