Skip to content

Instantly share code, notes, and snippets.

@anna-hope
Last active December 15, 2016 02:55
Show Gist options
  • Select an option

  • Save anna-hope/d85d918abda616270ceca43738fb4e89 to your computer and use it in GitHub Desktop.

Select an option

Save anna-hope/d85d918abda616270ceca43738fb4e89 to your computer and use it in GitHub Desktop.
Word Error Rate
# Anton Melnikov
# used https://martin-thoma.com/word-error-rate-calculation/
# and http://progfruits.blogspot.com/2014/02/word-error-rate-wer-and-word.html
# as reference
import numpy as np
def get_wer(reference, hypothesis):
# create the matrices
d = np.zeros((len(reference) +1) * (len(hypothesis)+1), dtype='int32')
d = d.reshape((len(reference)+1, len(hypothesis)+1))
backtrace = d.copy()
# https://en.wikipedia.org/wiki/Levenshtein_distance
# initialize the distance matrix
for i in range(len(reference)+1):
for j in range(len(hypothesis)+1):
if i == 0:
d[0, j] = j
elif j == 0:
d[i, 0] = i
# operation values (we'll need these for backtracking)
correct_op = 0
substitution_op = 1
insertion_op = 2
deletion_op = 3
for i in range(1, len(reference)+1):
for j in range(1, len(hypothesis)+1):
if reference[i-1] == hypothesis[j-1]:
d[i, j] = d[i-1, j-1]
backtrace[i, j] = correct_op
else:
substitution = d[i-1][j-1] + 1
insertion = d[i][j-1] + 1
deletion = d[i-1][j] + 1
min_edit = min(substitution, insertion, deletion)
if min_edit == substitution:
op = substitution_op
elif min_edit == insertion:
op = insertion_op
else:
op = deletion_op
d[i, j] = min_edit
backtrace[i, j] = op
corrects = 0
substitutions = 0
insertions = 0
deletions = 0
editops = []
i = len(reference)
j = len(hypothesis)
# go through the operations to backtrace the best path
while i > 0 and j > 0:
if backtrace[i, j] == correct_op:
corrects += 1
i -= 1
j -= 1
elif backtrace[i, j] == substitution_op:
substitutions += 1
editops.append(('replace', i, j))
i -= 1
j -= 1
elif backtrace[i, j] == insertion_op:
insertions += 1
editops.append(('insert', i, j))
j -= 1
else:
# deletion
deletions += 1
editops.append(('delete', i, j))
i -= 1
n = substitutions + deletions + corrects
assert n == len(reference)
wer = (substitutions + deletions + insertions) / n
# reverse the editops because we built them by backtracking
return wer, editops[::-1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment