Skip to content

Instantly share code, notes, and snippets.

@guilherme-miyake
Last active October 22, 2025 09:16
Show Gist options
  • Save guilherme-miyake/c32052c8f28a7c1591f5b319504cf4fa to your computer and use it in GitHub Desktop.
Save guilherme-miyake/c32052c8f28a7c1591f5b319504cf4fa to your computer and use it in GitHub Desktop.
Levenshtein Distance Options
# Refactor of: https://medium.com/@hebozhe/optimizing-levenshtein-distance-in-python-using-built-ins-only-8f6b37e9de89
def calculate_distance_to_last_position(matching_positions: list[tuple[int, int]]) -> int:
'''
summary: Calculate the minimum distance from (0, 0) to the end of
a list of Levenshtein coordinates.
params:
coords: list[tuple[int, int]] of (row, column) coordinates where
the characters of two strings were identical.
return: int of the Levenshtein distance.
ex: calculate_distance_to_last_position([(1, 1), (2, 5), (4, 2),
(6, 4), (7, 5), (8, 6), (9, 7)]) -> 3
{(1, 1): 0, (2, 5): 4, (4, 2): 3, (6, 4): 5, (7, 5): 6,
(8, 6): 7, (9, 7): 8}
{(2, 5): 3, (4, 2): 2, (6, 4): 4, (7, 5): 5, (8, 6): 6, (9, 7): 7}
{(4, 2): 2, (6, 4): 4, (7, 5): 5, (8, 6): 6, (9, 7): 7}
{(6, 4): 3, (7, 5): 4, (8, 6): 5, (9, 7): 6}
{(7, 5): 3, (8, 6): 4, (9, 7): 5}
{(8, 6): 3, (9, 7): 4}
{(9, 7): 3}
distances[final_position] == 3
chars: 58 + 733 = 791
'''
distances = {position: max(position) for position in matching_positions}
final_position = matching_positions[-1]
current_min_distance = distances[final_position]
matching_position_discount = -1
for reference in matching_positions[:-1]:
(reference_x, reference_y), reference_distance = reference, distances.pop(reference)
if reference_distance >= current_min_distance:
continue
positions_ahead = {
(position_x, position_y): position_distance
for (position_x, position_y), position_distance in distances.items()
if position_x > reference_x and position_y > reference_y and position_distance > reference_distance
}
distances_through_reference = {
(position_x, position_y):
min(position_distance, (reference_distance + matching_position_discount + max(position_x - reference_x, position_y - reference_y)))
for (position_x, position_y), position_distance in positions_ahead.items()
}
distances |= distances_through_reference
current_min_distance = min(distances[final_position], current_min_distance)
if min(distances.values()) == current_min_distance:
return current_min_distance
return distances[final_position]
def levshtein_distance_by_matching_positions(of_str_a: str, of_str_b: str) -> int:
'''
summary: Get the Levenshtein distance of two strings by finding
an optimal path from a list of character-matching coordinates.
params:
of_str_a: str used to measure Levenshtein distance.
of_str_b: str used to measure Levenshtein distance.
return: int of the Levenshtein distance between of_str_a and of_str_b.
chars: 60 + 404 = 464
'''
matching_positions = [(index_a, index_b)
for index_a, char_a in enumerate(of_str_a)
for index_b, char_b in enumerate(of_str_b)
if char_a == char_b]
matching_positions.append((len(of_str_a), len(of_str_b)))
return calculate_distance_to_last_position(matching_positions=matching_positions)
@lru_cache(maxsize=5000)
def edit_distance(a: str, b: str):
return recursive_edit_distance(a, b)
@lru_cache(maxsize=5000)
def recursive_edit_distance(a: str, b: str, distance: int = 0):
# no more characters in one substring
if len(a) == 0 or len(b) == 0:
return len(a) + len(b) + distance
# no change required
elif a[0] == b[0]:
return recursive_edit_distance(a[1:], b[1:], distance)
# change detected
return min(
recursive_edit_distance(a, b[1:], distance + 1), # insert character
recursive_edit_distance(a[1:], b, distance + 1), # delete character
recursive_edit_distance(a[1:], b[1:], distance + 1), # replace character
)
def a_star_edit_distance(starting_a: str, starting_b: str):
max_distance = max(len(starting_a), len(starting_b))
next_step = (max_distance, 0, starting_a, starting_b)
priority_heap = []
def a_star_reduce_distance(current_distance, steps, substring_a, substring_b):
if current_distance + steps - max(len(starting_a), len(starting_b)) > 2:
return None
if len(substring_a) == 0 or len(substring_b) == 0:
return current_distance
elif substring_a == substring_b:
return current_distance - len(substring_a)
elif substring_a[0] == substring_b[0]:
next_step = heapq.heappushpop(
priority_heap,
(current_distance - 1, steps, substring_a[1:], substring_b[1:]) # no change required
)
return next_step
else:
heapq.heappush(
priority_heap,
(current_distance, steps + 1, substring_a, substring_b[1:]) # skip 1 b = insert character
)
heapq.heappush(
priority_heap,
(current_distance, steps + 1, substring_a[1:], substring_b) # delete character
)
next_step = heapq.heappushpop(
priority_heap,
(current_distance, steps + 1, substring_a[1:], substring_b[1:]) # replace character
)
return next_step
while True:
if isinstance(next_step, int):
return next_step
next_step = a_star_reduce_distance(*next_step)
class BaseCoordinates:
x: int
y: int
distance_from_origin: int
class StringCoordinates(BaseCoordinates):
def __init__(self, x, y):
self.x = x
self.y = y
self.distance_from_origin = max(x, y) - 1
def update_distance_using_reference(self, reference: BaseCoordinates):
x_distance = self.x - reference.x
y_distance = self.y - reference.y
distance_from_reference = max(x_distance, y_distance)
# for string positions, we need subtract 1 to find edit_distance
distance_through_reference = reference.distance_from_origin + distance_from_reference - 1
if self.distance_from_origin > distance_through_reference:
self.distance_from_origin = distance_through_reference
return self
def __gt__(self, other: BaseCoordinates):
return self.x > other.x and self.y > other.y and self.distance_from_origin > other.distance_from_origin
def calculate_distance_to_last_coordinate_2(matching_positions: list[StringCoordinates]) -> int:
'''
summary: Calculate the minimum distance from (0, 0) to the end of
a list of Levenshtein coordinates.
params:
coords: list[tuple[int, int]] of (row, column) coordinates where
the characters of two strings were identical.
return: int of the Levenshtein distance.
ex: calculate_distance_to_last_position([(1, 1), (2, 5), (4, 2),
(6, 4), (7, 5), (8, 6), (9, 7)]) -> 3
{(1, 1): 0, (2, 5): 4, (4, 2): 3, (6, 4): 5, (7, 5): 6,
(8, 6): 7, (9, 7): 8}
{(2, 5): 3, (4, 2): 2, (6, 4): 4, (7, 5): 5, (8, 6): 6, (9, 7): 7}
{(4, 2): 2, (6, 4): 4, (7, 5): 5, (8, 6): 6, (9, 7): 7}
{(6, 4): 3, (7, 5): 4, (8, 6): 5, (9, 7): 6}
{(7, 5): 3, (8, 6): 4, (9, 7): 5}
{(8, 6): 3, (9, 7): 4}
{(9, 7): 3}
distances[final_position] == 3
chars: 58 + 733 = 791
'''
distances = [position for position in matching_positions]
final_position = matching_positions[-1]
current_min_distance = final_position.distance_from_origin
for position in range(len(matching_positions)):
reference = matching_positions[position]
if reference.distance_from_origin >= current_min_distance:
continue
new_distances = [
position.update_distance_using_reference(reference).distance_from_origin
for position in distances if position > reference
]
current_min_distance = min(final_position.distance_from_origin, current_min_distance)
if min(new_distances) == current_min_distance:
return current_min_distance
return final_position.distance_from_origin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment