Last active
January 1, 2016 04:29
-
-
Save odashi/8092484 to your computer and use it in GitHub Desktop.
BiLingual Evaluation Understudy (BLEU) between corpus and BLEU+1 per sentence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
import codecs | |
import math | |
from collections import defaultdict | |
# calculate BLEU score between two corpus | |
def BLEU(ws_hyp, ws_ref, **kwargs): | |
# check args | |
max_n = 4 | |
if 'maxn' in kwargs and isinstance(kwargs['maxn'], int) and kwargs['maxn'] > 0: | |
max_n = kwargs['maxn'] | |
dump = False | |
if 'dump' in kwargs and kwargs['dump'] is True: | |
dump = True | |
# calc N-gram precision | |
np = 0 | |
for n in range(1, max_n+1): | |
numer = 0 | |
denom = 0 | |
for hyp, ref in zip(ws_hyp, ws_ref): | |
if len(hyp) < n: | |
continue | |
possible_ngram = defaultdict(lambda: 0) | |
for i in range(len(ref)-(n-1)): | |
possible_ngram[tuple(ref[i:i+n])] += 1 | |
for i in range(len(hyp)-(n-1)): | |
key = tuple(hyp[i:i+n]) | |
if key in possible_ngram and possible_ngram[key] > 0: | |
possible_ngram[key] -= 1 | |
numer += 1 | |
denom += len(hyp)-(n-1) | |
if dump: | |
print('P(n=%d) = %f (%d/%d)' % (n, numer/float(denom), numer, denom)) | |
np += math.log(numer) - math.log(denom) | |
# calc brevity penalty | |
sumlen_hyp = sum(len(x) for x in ws_hyp) | |
sumlen_ref = sum(len(x) for x in ws_ref) | |
bp = min(1.0, math.exp(1.0-sumlen_ref/float(sumlen_hyp))) | |
if dump: | |
print('BP = %f (HYP:%d, REF:%d)' % (bp, sumlen_hyp, sumlen_ref)) | |
# calc final score | |
bleu = bp * math.exp(np/max_n) | |
if dump: | |
print('BLEU = %f' % bleu) | |
return bleu | |
# calculate BLEU+1 score between two sentences | |
def BLEUp1(hyp, ref, **kwargs): | |
# check args | |
max_n = 4 | |
if 'maxn' in kwargs and isinstance(kwargs['maxn'], int) and kwargs['maxn'] > 0: | |
max_n = kwargs['maxn'] | |
# calc N-gram precision | |
np = 0 | |
for n in range(1, max_n+1): | |
numer = 0 if n == 1 else 1 | |
possible_ngram = defaultdict(lambda: 0) | |
for i in range(len(ref)-(n-1)): | |
possible_ngram[tuple(ref[i:i+n])] += 1 | |
for i in range(len(hyp)-(n-1)): | |
key = tuple(hyp[i:i+n]) | |
if key in possible_ngram and possible_ngram[key] > 0: | |
possible_ngram[key] -= 1 | |
numer += 1 | |
if numer == 0: # no shared unigram | |
return 0 | |
denom = (0 if n == 1 else 1) + max(0, len(hyp)-(n-1)) | |
np += math.log(numer) - math.log(denom) | |
# calc brevity penalty | |
bp = min(1.0, math.exp(1.0-len(ref)/float(len(hyp)))) | |
# calc final score | |
bleu = bp * math.exp(np/max_n) | |
return bleu | |
def main(): | |
if len(sys.argv) != 3: | |
print('USAGE: python bleu.py <file:HYP> <file:REF>') | |
return | |
ws_hyp = [] | |
with codecs.open(sys.argv[1], 'r', 'utf-8') as fp: | |
for l in fp: | |
ls = l.strip().split(' ') | |
ws_hyp.append(ls) | |
ws_ref = [] | |
with codecs.open(sys.argv[2], 'r', 'utf-8') as fp: | |
for l in fp: | |
ls = l.strip().split(' ') | |
ws_ref.append(ls) | |
BLEU(ws_hyp, ws_ref, dump=True) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment