Created
February 7, 2017 22:26
-
-
Save niklio/4f7578bf24f978081a332499d9a60aac to your computer and use it in GitHub Desktop.
Word error rate script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import os | |
import numpy | |
import argparse | |
def wer(r, h): | |
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8) | |
d = d.reshape((len(r)+1, len(h)+1)) | |
for i in xrange(len(r)+1): | |
for j in xrange(len(h)+1): | |
if i == 0: | |
d[0][j] = j | |
elif j == 0: | |
d[i][0] = i | |
for i in xrange(1, len(r)+1): | |
for j in xrange(1, len(h)+1): | |
if r[i-1] == h[j-1]: | |
d[i][j] = d[i-1][j-1] | |
else: | |
d[i][j] = min(d[i-1][j-1] + 1, d[i][j-1] + 1, d[i-1][j] + 1) | |
return d[len(r)][len(h)] | |
def get_contents(filename): | |
with open(filename) as f: | |
return f.read().lower() | |
def read_wer(rfile, hfile): | |
r = get_contents(rfile) | |
h = get_contents(hfile) | |
return wer(r, h) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description="Word Error Rate") | |
parser.add_argument('truth', type=str, nargs=1, help='Path to file containing ground truth') | |
parser.add_argument('pred', type=str, nargs=1, help='Path to file containing word prediction') | |
args = parser.parse_args() | |
truth_file = os.path.join(os.getcwd(), args.truth[0]) | |
pred_file = os.path.join(os.getcwd(), args.pred[0]) | |
print 1 - (read_wer(truth_file, pred_file) / len(get_contents(truth_file))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment