Last active
January 2, 2018 15:54
-
-
Save dustalov/df3c644ebe3367660e13a1f23ac200a7 to your computer and use it in GitHub Desktop.
Normalized Modified Purity in Python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script computes the normalized modified purity and inverse purity | |
# as according to this paper: https://aclweb.org/anthology/P14-1097. | |
# In fact, this program is currently quite a rough translation of | |
# the evaluation-verb-classes.perl script provided by Daisuke Kawahara. | |
import argparse | |
import re | |
import sys | |
from collections import defaultdict | |
from math import log | |
CLUSTER = re.compile('^Class (\d+): (.+)') | |
VERB = re.compile('^(.+)-([.\d]+)$') | |
TAB = re.compile('\t+') | |
VALUE = re.compile(':\d+') | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-m', '--multi', action='store_true') | |
parser.add_argument('--gold', type=argparse.FileType('r', encoding='UTF-8')) | |
parser.add_argument('resource', type=argparse.FileType('r', encoding='UTF-8')) | |
args = parser.parse_args() | |
classes = defaultdict(lambda: defaultdict(int)) | |
clusters = defaultdict(lambda: defaultdict(int)) | |
verbs_in_class = defaultdict(int) | |
verbs_in_cluster = defaultdict(int) | |
def count_cluster_num(verb_hr): | |
if not args.multi: | |
return len(verb_hr) | |
sum = 0. | |
for verb in verb_hr: | |
sum += verb_hr[verb] | |
return sum | |
def evaluate_one_cluster(verb_hr, classes_hr): | |
max_count = -1 | |
for klass in classes_hr: | |
count = 0. | |
for verb in verb_hr: | |
if verb in classes_hr[klass]: | |
if args.multi: | |
count += verb_hr[verb] | |
else: | |
count += 1 | |
if max_count < count: | |
max_count = count | |
return max_count | |
# F_beta score | |
def calc_f(p, r, beta=1.): | |
return (1 + pow(beta, 2)) * p * r / (pow(beta, 2) * p + r) | |
def entropy(cluster_hr): | |
N = 0. | |
for cluster in cluster_hr: | |
N += count_cluster_num(cluster_hr[cluster]) | |
score = 0. | |
for cluster in cluster_hr: | |
cluster_count = count_cluster_num(cluster_hr[cluster]) | |
score += -1 * cluster_count / N * log(cluster_count / N) | |
return score | |
def conditional_entropy(cluster_hr, class_hr): | |
N = 0 | |
for cluster in cluster_hr: | |
N += count_cluster_num(cluster_hr[cluster]) | |
score = 0 | |
for cluster in cluster_hr: | |
overlap_sum_given_cluster = 0 | |
for klass in class_hr: | |
overlap_sum_given_cluster += count_hash_overlap(cluster_hr[cluster], class_hr[klass]) | |
for klass in class_hr: | |
overlap_count = count_hash_overlap(cluster_hr[cluster], class_hr[klass]) | |
if overlap_count > 0: | |
score += -1 * overlap_count / N * log(overlap_count / overlap_sum_given_cluster) | |
return score | |
def mutual_information(cluster_hr, class_hr): | |
N = 0 | |
for cluster in cluster_hr: | |
N += count_cluster_num(cluster_hr[cluster]) | |
score = 0 | |
for cluster in cluster_hr: | |
cluster_count = count_cluster_num(cluster_hr[cluster]) | |
for klass in class_hr: | |
class_count = count_cluster_num(class_hr[klass]) | |
overlap_count = count_hash_overlap(cluster_hr[cluster], class_hr[klass]) | |
if overlap_count > 0: | |
score += overlap_count / N * log(N * overlap_count / cluster_count / class_count) | |
return score | |
def count_hash_overlap(hash1_hr, hash2_hr): | |
count = 0 | |
for key in hash1_hr: | |
if key in hash2_hr: | |
if args.multi: | |
count += hash2_hr[key] | |
else: | |
count += 1 | |
return count | |
for line in args.gold: | |
klass, _, verbs_str = TAB.split(line.rstrip(), 2) | |
for verb in verbs_str.split(' '): | |
classes[klass][verb] += 1 | |
verbs_in_class[verb] += 1 | |
if args.multi: | |
for klass in classes: | |
for verb in classes[klass]: | |
classes[klass][verb] /= verbs_in_class[verb] | |
for line in args.resource: | |
match = CLUSTER.match(line) | |
if match: | |
cluster, verbs_str = match.group(1), match.group(2) | |
for verb in verbs_str.rstrip().split(' '): | |
value = None | |
match = VERB.match(verb) | |
if match: | |
verb, value = match.group(1), match.group(2) | |
value = float(value) | |
elif '-' in verb: | |
print('Hyphen found in the verb.', file=sys.stderr) | |
sys.exit(1) | |
else: | |
value = 1 | |
verb = VALUE.sub('', verb) | |
clusters[cluster][verb] += value | |
verbs_in_cluster[verb] += value | |
if args.multi: | |
for cluster in clusters: | |
for verb in clusters[cluster]: | |
clusters[cluster][verb] /= verbs_in_cluster[verb] | |
cluster_num = len(clusters) | |
print('# of Clusters: {0}'.format(cluster_num)) | |
cluster_entropy = entropy(clusters) | |
class_entropy = entropy(classes) | |
print('clu_e = %.5f' % cluster_entropy) | |
print('cla_e = %.5f' % class_entropy) | |
nmi = 2 * mutual_information(clusters, classes) / (cluster_entropy + class_entropy) | |
print('MI = %.5f' % mutual_information(clusters, classes)) | |
print('NMI = %.5f' % nmi) | |
homogeneity = 1 if class_entropy == 0 else 1 - conditional_entropy(clusters, classes) / class_entropy | |
completeness = 1 if cluster_entropy == 0 else 1 - conditional_entropy(classes, clusters) / cluster_entropy | |
print('h = %.5f' % homogeneity) | |
print('c = %.5f' % completeness) | |
print('V1 = %.5f' % calc_f(homogeneity, completeness)) | |
print('Cluster status:') | |
correct_sum = 0 | |
modified_correct_sum = 0 | |
all_sum = 0 | |
for cluster in sorted(clusters): | |
max_count = evaluate_one_cluster(clusters[cluster], classes) | |
verb_num = count_cluster_num(clusters[cluster]) | |
correct_sum += max_count | |
if len(clusters[cluster]) > 1: | |
modified_correct_sum += max_count | |
all_sum += verb_num | |
print('\t%s %.5f (%.1f / %.1f)' % (cluster, max_count / verb_num, max_count, verb_num)) | |
purity = correct_sum / all_sum | |
modified_purity = modified_correct_sum / all_sum | |
print('purity = %.5f (%.1f / %.1f)' % (purity, correct_sum, all_sum)) | |
print('modified purity = %.5f (%.1f / %.1f)' % (modified_purity, modified_correct_sum, all_sum)) | |
print('Class status:') | |
correct_sum = 0 | |
all_sum = 0 | |
for klass in classes: | |
max_count = evaluate_one_cluster(classes[klass], clusters) | |
verb_num = count_cluster_num(classes[klass]) | |
correct_sum += max_count | |
all_sum += verb_num | |
print('\t%s %.5f (%.1f / %.1f)' % (klass, max_count / verb_num, max_count, verb_num)) | |
inverse_purity = correct_sum / all_sum | |
print('inverse purity = %.5f (%.1f / %.1f)' % (inverse_purity, correct_sum, all_sum)) | |
print('F1 (purity&inverse_purity) = %.5f' % calc_f(purity, inverse_purity)) | |
print('F1 (modified_purity&inverse_purity) = %.5f' % calc_f(modified_purity, inverse_purity)) | |
print('F0.5 (purity&inverse_purity) = %.5f' % calc_f(purity, inverse_purity, 0.5)) | |
print('F0.5 (modified_purity&inverse_purity) = %.5f' % (calc_f(modified_purity, inverse_purity, 0.5))) | |
print('#%d %.5f %.5f %.5f %.5f %.5f' % ( | |
cluster_num, purity * 100, modified_purity * 100, inverse_purity * 100, | |
calc_f(modified_purity, inverse_purity) * 100, | |
calc_f(modified_purity, inverse_purity, 0.5) * 100)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment