Skip to content

Instantly share code, notes, and snippets.

@vmarkovtsev
Last active February 18, 2021 16:42
Show Gist options
  • Save vmarkovtsev/0112f825a0aa3612b5966f275518f231 to your computer and use it in GitHub Desktop.
Save vmarkovtsev/0112f825a0aa3612b5966f275518f231 to your computer and use it in GitHub Desktop.
from tqdm.notebook import tqdm
results = []
for norm_name, normalize in tqdm((("simple", normalize_simple),
("unidecode", normalize_unidecode),
("unidecode_metaphone", normalize_unidecode_metaphone))):
for comp_name, compare in tqdm((("ratio", distance_ratio),
("sort_ratio", distance_sort_ratio),
("set_ratio", distance_set_ratio),
("ratio_join", distance_ratio_join),
("max_ratio", distance_max_ratio))):
for match_name, match in tqdm((("greedy", greedy_match),
("lap", lap_match))):
def evaluate(names1, names2):
series1, series2 = (
[normalize(" ".join(sorted(strs))) for strs in names]
for names in (names1.values(), names2.values())
)
matches = match(series1, series2, compare)
keys2 = list(names2)
truth = [keys2.index(key) for key in names1]
return (matches == truth).sum() / len(truth)
results.append((np.mean([evaluate(*org) for org in dataset.values()]),
norm_name, comp_name, match_name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment