Created
October 31, 2019 10:43
-
-
Save macleginn/97de7b638140251a2fa3912bfdb349c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
confusion_dict_pos = {} | |
confusion_dict_paths = {} | |
# NEW STUFF # | |
addition_stats_pos = Counter() | |
addition_stats_rel = Counter() | |
# NEW STUFF # | |
strip_direction = lambda x: x.split('_')[0] | |
for i in range(len(en)): | |
en_n, en_g = conll2graph(en[i]) | |
ko_n, ko_g = conll2graph(ko[i]) | |
alignment = alignments[i] | |
# Simplify the alignment to a set of one-to-one pairs | |
one_to_one = [] | |
for k, v in alignment.items(): | |
if k == 'X': | |
# NEW STUFF # | |
for idx in v: | |
if '.' in idx: | |
continue | |
new_node = ko_n[idx] | |
addition_stats_pos[new_node['pos']] += 1 | |
addition_stats_rel[new_node['relation']] += 1 | |
# NEW STUFF # | |
head = k | |
tail = str(highest_or_none(v, ko_g)) | |
one_to_one.append((head, tail)) | |
# POS confusion dict | |
for pair in one_to_one: | |
head, tail = pair | |
# Skip technical additional nodes | |
if '.' in head: | |
continue | |
try: | |
en_pos = en_n[head]['pos'] | |
except KeyError: | |
print(i, en[i]) | |
continue | |
if tail == 'None': | |
ko_pos = 'None' | |
else: | |
ko_pos = ko_n[tail]['pos'] | |
if en_pos not in confusion_dict_pos: | |
confusion_dict_pos[en_pos] = Counter() | |
confusion_dict_pos[en_pos][ko_pos] += 1 | |
# Path confusion dict | |
for pair in combs(one_to_one, 2): | |
(en_head, ko_head), (en_tail, ko_tail) = pair | |
# Skip technical additional nodes | |
if '.' in head: | |
continue | |
en_path_arr = get_path(en_head, en_tail, en_g) | |
if len(en_path_arr) > 1: | |
continue | |
en_path = strip_direction(en_path_arr[0]) | |
if ko_head == ko_tail: | |
ko_path = 'Nodes collapsed' | |
elif ko_head == 'None' and ko_tail == 'None': | |
ko_path = 'Both endpoints unaligned' | |
elif ko_head == 'None' or ko_tail == 'None': | |
ko_path = 'One endpoint unaligned' | |
else: | |
ko_path_arr = get_path(ko_head, ko_tail, ko_g) | |
ko_path = '->'.join( | |
list(map(strip_direction, ko_path_arr)) | |
) | |
if en_path not in confusion_dict_paths: | |
confusion_dict_paths[en_path] = Counter() | |
confusion_dict_paths[en_path][ko_path] += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment