Skip to content

Instantly share code, notes, and snippets.

@xiabingquan
Last active December 12, 2022 06:10
Show Gist options
  • Save xiabingquan/18c008cc8642eeeccd7ba0c750af4d3f to your computer and use it in GitHub Desktop.
Save xiabingquan/18c008cc8642eeeccd7ba0c750af4d3f to your computer and use it in GitHub Desktop.
Visualize errors of Edit Distance (Insertion, Deletion and Replacement)
from argparse import ArgumentParser
import editdistance
from rich.text import Text
from rich.console import Console
def edit_dist_dp(gt, hyp):
"""
A Dynamic Programming based Python program for edit distance problem
References: https://www.geeksforgeeks.org/edit-distance-dp-5/
Args:
gt:
hyp:
Returns:
"""
m, n = len(gt), len(hyp)
# Create a table to store results of subproblems
dp = [[0 for x in range(n + 1)] for x in range(m + 1)]
ops = [['' for x in range(n + 1)] for x in range(m + 1)]
# Fill d[][] in bottom up manner
for i in range(m + 1):
for j in range(n + 1):
# If first string is empty, only option is to
# insert all characters of second string
if i == 0:
dp[i][j] = j # Min. operations = j
ops[i][j] = "del"
# If second string is empty, only option is to
# remove all characters of second string
elif j == 0:
dp[i][j] = i # Min. operations = i
ops[i][j] = "ins"
# If last characters are same, ignore last char
# and recur for remaining string
elif gt[i - 1] == hyp[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
ops[i][j] = "keep"
# If last character are different, consider all
# possibilities and find minimum
else:
x = min(dp[i][j - 1], dp[i - 1][j], dp[i - 1][j - 1])
dp[i][j] = 1 + x
if dp[i][j - 1] == x: # delete
ops[i][j] = "del"
if dp[i - 1][j] == x: # insert
ops[i][j] = "ins"
if dp[i - 1][j - 1] == x: # replace
ops[i][j] = "rep"
return dp[m][n], ops
def analyze(gt, hyp, ops, d, text=None):
errs = {
"rep": 0,
"ins": 0,
"del": 0,
# "correct": 0
}
i, j = len(gt), len(hyp)
gt = [''] + gt
hyp = [''] + hyp
s = []
while i > 0 or j > 0: # exit: i == 0 and j == 0
if ops[i][j] == "keep":
s.append((hyp[j], "underline"))
i -= 1
j -= 1
# errs["correct"] += 1
elif ops[i][j] == "rep":
s.append((f"【{hyp[j]}->{gt[i]}】", ''))
i -= 1
j -= 1
errs["rep"] += 1
elif ops[i][j] == "ins":
s.append((f"\\{gt[i]}/", ''))
i -= 1
errs["ins"] += 1
elif ops[i][j] == "del":
s.append((hyp[j], "strike"))
j -= 1
errs["del"] += 1
else:
raise ValueError
assert sum(errs.values()) == d
if text is None:
text = Text()
text.append(f"gt: {' '.join(gt)}\n")
text.append(f"hyp: {' '.join(hyp)}\n")
text.append("comparison: ")
for t in s[::-1]:
text.append(t[0], style=t[1])
text.append(' ')
text.append('\n')
text.append(f"err/num: {d}/{len(gt) - 1} -> ")
text.append(', '.join([f"{k}: {v}" for k, v in errs.items()]))
return text, errs
if __name__ == "__main__":
argparser = ArgumentParser()
argparser.add_argument("--ground_truth", "--gt", type=str, required=True)
argparser.add_argument("--hypothesis", "--hyp", type=str, required=True)
argparser.add_argument("--delimeter", "-d", type=str, default=' ')
args = argparser.parse_args()
gt, hyp = args.ground_truth, args.hypothesis
gt, hyp = gt.split(args.delimeter), hyp.split(args.delimeter)
d, ops = edit_dist_dp(gt, hyp)
assert d == editdistance.eval(gt, hyp)
with Console() as console:
text, err = analyze(gt, hyp, ops, d)
console.print(text)
@xiabingquan
Copy link
Author

xiabingquan commented Dec 12, 2022

An example:
python3 ./visualize_edit_distance.py --gt "WE'RE GOING TO START PUTTING AN ENTIRE LAYER OF DIGITAL INFORMATION ON THE REAL WORLD" --hyp "WE CAN START POINTING ENTIRE COLLECTIVE DIGITAL INFORMATION ON THE REAL WORLD"
Outputs:

gt: gt: WE'RE GOING TO START PUTTING AN ENTIRE LAYER OF DIGITAL INFORMATION ON THE REAL WORLD
hyp: WE CAN START POINTING ENTIRE COLLECTIVE DIGITAL INFORMATION ON THE REAL WORLD
comparison: \WE'RE/ 【WE->GOING】 【CAN->TO】 START \PUTTING/ 【POINTING->AN】 ENTIRE \LAYER/ 【COLLECTIVE->OF】 DIGITAL INFORMATION ON THE REAL WORLD
err/num: 7/15 -> rep: 4, ins: 3, del: 0

Test it in shell to see full stylizations.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment