Skip to content

Instantly share code, notes, and snippets.

@niklio
Created February 7, 2017 22:26
Show Gist options
  • Save niklio/4f7578bf24f978081a332499d9a60aac to your computer and use it in GitHub Desktop.
Save niklio/4f7578bf24f978081a332499d9a60aac to your computer and use it in GitHub Desktop.
Word error rate script
from __future__ import division
import os
import numpy
import argparse
def wer(r, h):
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
d = d.reshape((len(r)+1, len(h)+1))
for i in xrange(len(r)+1):
for j in xrange(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
for i in xrange(1, len(r)+1):
for j in xrange(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
d[i][j] = min(d[i-1][j-1] + 1, d[i][j-1] + 1, d[i-1][j] + 1)
return d[len(r)][len(h)]
def get_contents(filename):
with open(filename) as f:
return f.read().lower()
def read_wer(rfile, hfile):
r = get_contents(rfile)
h = get_contents(hfile)
return wer(r, h)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Word Error Rate")
parser.add_argument('truth', type=str, nargs=1, help='Path to file containing ground truth')
parser.add_argument('pred', type=str, nargs=1, help='Path to file containing word prediction')
args = parser.parse_args()
truth_file = os.path.join(os.getcwd(), args.truth[0])
pred_file = os.path.join(os.getcwd(), args.pred[0])
print 1 - (read_wer(truth_file, pred_file) / len(get_contents(truth_file)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment