Last active
February 18, 2021 16:42
-
-
Save vmarkovtsev/0112f825a0aa3612b5966f275518f231 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm.notebook import tqdm | |
results = [] | |
for norm_name, normalize in tqdm((("simple", normalize_simple), | |
("unidecode", normalize_unidecode), | |
("unidecode_metaphone", normalize_unidecode_metaphone))): | |
for comp_name, compare in tqdm((("ratio", distance_ratio), | |
("sort_ratio", distance_sort_ratio), | |
("set_ratio", distance_set_ratio), | |
("ratio_join", distance_ratio_join), | |
("max_ratio", distance_max_ratio))): | |
for match_name, match in tqdm((("greedy", greedy_match), | |
("lap", lap_match))): | |
def evaluate(names1, names2): | |
series1, series2 = ( | |
[normalize(" ".join(sorted(strs))) for strs in names] | |
for names in (names1.values(), names2.values()) | |
) | |
matches = match(series1, series2, compare) | |
keys2 = list(names2) | |
truth = [keys2.index(key) for key in names1] | |
return (matches == truth).sum() / len(truth) | |
results.append((np.mean([evaluate(*org) for org in dataset.values()]), | |
norm_name, comp_name, match_name)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment