Last active
October 4, 2022 07:47
-
-
Save StSav012/914496817eb88be7a0e479a21f927a4e to your computer and use it in GitHub Desktop.
Find the longest common sequences of two sequences given
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Iterable, Iterator, Sequence, TypeVar | |
_T = TypeVar('_T') | |
def arg_max(x: Iterable[_T]) -> int | None: | |
""" find the index of the largest item in the series """ | |
am: int | None = None | |
max_x: _T | None = None | |
for i, x_i in enumerate(x): | |
if am is None or x_i > max_x: | |
max_x = x_i | |
am = i | |
return am | |
def lcs(a: Sequence[_T], b: Sequence[_T]) -> list[tuple[int, int]]: | |
# get the coordinates of equal items of the sequences | |
cs_indices: list[tuple[int, int]] = [] | |
i_a: int = 0 | |
while i_a < len(a): | |
i_b: int = 0 | |
while i_a < len(a) and i_b < len(b): | |
if a[i_a] == b[i_b]: | |
while i_a < len(a) and i_b < len(b) and a[i_a] == b[i_b]: | |
cs_indices.append((i_a, i_b)) | |
i_a += 1 | |
i_b += 1 | |
i_a -= 1 | |
i_b -= 1 | |
i_b += 1 | |
i_a += 1 | |
# if no equal items, return [] | |
if not cs_indices: | |
return [] | |
def all_next_cs(_cs_indices: list[tuple[int, int]], | |
start: list[tuple[int, int]] | None = None) -> Iterator[list[tuple[int, int]]]: | |
""" | |
get all possible coordinates of the equal items, | |
which are to the right of the items of the `start` coordinates | |
""" | |
if start is None: | |
start = [] | |
j: int = 0 | |
while j < len(_cs_indices): | |
while (len(start) > 0 | |
and j < len(_cs_indices) | |
and (_cs_indices[j][0] <= start[-1][0] or _cs_indices[j][1] <= start[-1][1])): | |
j += 1 | |
if j < len(_cs_indices): | |
yield from all_next_cs(_cs_indices, start + [_cs_indices[j]]) | |
j += 1 | |
yield start | |
# get all possible series of equal items, which are in sequential order | |
cs_indices_series: list[list[tuple[int, int]]] = [] | |
cs_index: tuple[int, int] | |
for cs_index in cs_indices: | |
cs_indices_series.extend(list(all_next_cs(cs_indices, start=[cs_index]))) | |
# determine the most optimal sequence of identical items | |
# find the longest series (or several) | |
max_length_indices_series: list[list[tuple[int, int]]] = [cs_indices_series[0]] | |
for cs_indices in cs_indices_series: | |
if len(cs_indices) > len(max_length_indices_series[0]): | |
max_length_indices_series = [cs_indices] | |
elif len(cs_indices) == len(max_length_indices_series[0]): | |
max_length_indices_series.append(cs_indices) | |
# if there is only one series of the max length, return it | |
if len(max_length_indices_series) == 1: | |
return max_length_indices_series[0] | |
def sequential_indices_lengths_sum(_cs_indices: list[tuple[int, int]]) -> int: | |
""" calculate the total length of the sequential coordinates of the equal items """ | |
ss = 0 | |
j: int | |
for j in range(1, len(_cs_indices)): | |
ss += (int(_cs_indices[j][0] == _cs_indices[j - 1][0] + 1) | |
+ | |
int(_cs_indices[j][1] == _cs_indices[j - 1][1] + 1)) | |
return ss | |
# calculate the total length of the sequential coordinates of the equal items | |
# for all the series of the max length | |
sequential_indices_lengths_sums: Iterable[int] = ( | |
sequential_indices_lengths_sum(cs_indices) | |
for cs_indices in max_length_indices_series | |
) | |
# return the series which has the longest total length of the sequential coordinates of the equal items | |
return max_length_indices_series[arg_max(sequential_indices_lengths_sums)] | |
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 | |
lcs([ 0, 1, 0, 2, 4, 3, 1, 2, 1, 4, 5, 3, 1, 2], | |
# 0 1 2 3 4 5 6 7 | |
[4, 0, 1, 4, 5, 3, 1, 2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment