lopespm · December 28, 2021 09:25
diff --git a/levenshtein_regex_recursion_memo.py b/levenshtein_regex_recursion_memo.py
 # (Variant #4 for exercise 16.2 on EPI (Elements of Programming Interviews)) (September 2018 edition)
 # The core idea is calculate the levenshtein distance, while taking into account the special cases of the regex expression
 # *, +, ? and . were taken into account for the regex expression. Expression blocks are not supported
 # This algorithm uses recursion with memoization (could be transposed to a DP solution), yielding a O(mn) time complexity, O(mn) auxiliary space for cache and O(max(m,n)) function call stack
 # (m and n are the lengths of regex and target strings respectively)
 #
 # Version using dynamic programming: https://gist.github.com/lopespm/2362a77e7bd230a4622a43709c195826

 def regex_dist(regex: str, target: str):
    def regex_dist_aux(r_i, t_i):
        if (r_i == -1 and t_i == -1):
            return 0
        if (r_i == -1):
            return t_i + 1
        if (t_i == -1):
            i, counter = r_i, 0 
            while i >= 0:
                char = regex[i]
                i -= 2 if (char == '?' or char == '*' or char == '+') else 1
                counter += 1 if (not (char == '?' or char == '*')) else 0
            return counter
        
        if (memo[r_i][t_i] is not None):
            return memo[r_i][t_i]
        
        # Regex special cases
        if (regex[r_i] == '.'):
            memo[r_i][t_i] = regex_dist_aux(r_i - 1, t_i - 1)
            return memo[r_i][t_i]
        
        if (regex[r_i] == '+' or regex[r_i] == '*' or regex[r_i] == '?'):
            if (regex[r_i-1] == target[t_i]):
                if (regex[r_i] == '?'):
                    memo[r_i][t_i] = regex_dist_aux(r_i - 2, t_i - 1)
                else:
                    memo[r_i][t_i] = min(regex_dist_aux(r_i - 2, t_i - 1), regex_dist_aux(r_i, t_i - 1))
            else:
                additional_cost = 1 if (regex[r_i] == '+') else 0
                memo[r_i][t_i] = min(regex_dist_aux(r_i - 2, t_i - 1) + 1,
                           regex_dist_aux(r_i, t_i - 1) + 1,
                           regex_dist_aux(r_i - 2, t_i) + additional_cost)
            return memo[r_i][t_i]

        # Other characters
        if (regex[r_i] == target[t_i]):
            memo[r_i][t_i] = regex_dist_aux(r_i - 1, t_i - 1)
        else:
            memo[r_i][t_i] = min(regex_dist_aux(r_i - 1, t_i - 1) + 1,
                   regex_dist_aux(r_i, t_i - 1) + 1,
                   regex_dist_aux(r_i - 1, t_i) + 1)
        return memo[r_i][t_i]
        
        
    memo = [[None] * (len(target) + 1) for _ in range(len(regex) + 1)]
    return regex_dist_aux(len(regex) - 1, len(target) -1)


 # Some examples
 print(regex_dist("OrchestraaaQA+a", "CarthorseQAAAA"))
 print(regex_dist("A+b", "AAb"))
 print(regex_dist("A+b", "AAAAAb"))
 print(regex_dist("A+b", "AAAAAb03b"))
 print(regex_dist("A..b", "AAAAAb03b"))
 print(regex_dist("q+", "A"))
 print(regex_dist("q+a?a+", "A"))
 print(regex_dist("q+a?a+A+", "A"))
 print(regex_dist("q+a?a+A+.", "A"))
 print(regex_dist("q+A", "AAAAAb03b"))
	# (Variant #4 for exercise 16.2 on EPI (Elements of Programming Interviews)) (September 2018 edition)
	# The core idea is calculate the levenshtein distance, while taking into account the special cases of the regex expression
	# *, +, ? and . were taken into account for the regex expression. Expression blocks are not supported
	# This algorithm uses recursion with memoization (could be transposed to a DP solution), yielding a O(mn) time complexity, O(mn) auxiliary space for cache and O(max(m,n)) function call stack
	# (m and n are the lengths of regex and target strings respectively)
	#
	# Version using dynamic programming: https://gist.github.com/lopespm/2362a77e7bd230a4622a43709c195826

	def regex_dist(regex: str, target: str):
	def regex_dist_aux(r_i, t_i):
	if (r_i == -1 and t_i == -1):
	return 0
	if (r_i == -1):
	return t_i + 1
	if (t_i == -1):
	i, counter = r_i, 0
	while i >= 0:
	char = regex[i]
	i -= 2 if (char == '?' or char == '*' or char == '+') else 1
	counter += 1 if (not (char == '?' or char == '*')) else 0
	return counter

	if (memo[r_i][t_i] is not None):
	return memo[r_i][t_i]

	# Regex special cases
	if (regex[r_i] == '.'):
	memo[r_i][t_i] = regex_dist_aux(r_i - 1, t_i - 1)
	return memo[r_i][t_i]

	if (regex[r_i] == '+' or regex[r_i] == '*' or regex[r_i] == '?'):
	if (regex[r_i-1] == target[t_i]):
	if (regex[r_i] == '?'):
	memo[r_i][t_i] = regex_dist_aux(r_i - 2, t_i - 1)
	else:
	memo[r_i][t_i] = min(regex_dist_aux(r_i - 2, t_i - 1), regex_dist_aux(r_i, t_i - 1))
	else:
	additional_cost = 1 if (regex[r_i] == '+') else 0
	memo[r_i][t_i] = min(regex_dist_aux(r_i - 2, t_i - 1) + 1,
	regex_dist_aux(r_i, t_i - 1) + 1,
	regex_dist_aux(r_i - 2, t_i) + additional_cost)
	return memo[r_i][t_i]

	# Other characters
	if (regex[r_i] == target[t_i]):
	memo[r_i][t_i] = regex_dist_aux(r_i - 1, t_i - 1)
	else:
	memo[r_i][t_i] = min(regex_dist_aux(r_i - 1, t_i - 1) + 1,
	regex_dist_aux(r_i, t_i - 1) + 1,
	regex_dist_aux(r_i - 1, t_i) + 1)
	return memo[r_i][t_i]


	memo = [[None] * (len(target) + 1) for _ in range(len(regex) + 1)]
	return regex_dist_aux(len(regex) - 1, len(target) -1)


	# Some examples
	print(regex_dist("OrchestraaaQA+a", "CarthorseQAAAA"))
	print(regex_dist("A+b", "AAb"))
	print(regex_dist("A+b", "AAAAAb"))
	print(regex_dist("A+b", "AAAAAb03b"))
	print(regex_dist("A..b", "AAAAAb03b"))
	print(regex_dist("q+", "A"))
	print(regex_dist("q+a?a+", "A"))
	print(regex_dist("q+a?a+A+", "A"))
	print(regex_dist("q+a?a+A+.", "A"))
	print(regex_dist("q+A", "AAAAAb03b"))
No results found