Created
April 28, 2024 12:18
-
-
Save davidstap/787acb00fecb24559439b08cac30c5ea to your computer and use it in GitHub Desktop.
Calculate new languages for new MMTEB task
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mteb.tasks.BitextMining import * | |
new_task = "BibleNLPBitextMining" | |
### Gather existing BitextMining tasks | |
ignore = ["AbsTaskBitextMining", "Any", "CrosslingualTask", "TaskMetadata", "TEST_SAMPLES"] | |
ignore.append(new_task) | |
existing_tasks = [ | |
task for task in dir() if task[0].isupper() and task not in ignore | |
] | |
### Find all languages currently in BitextMining | |
existing_languages = [] | |
for task in existing_tasks: | |
existing_languages.extend(list(globals()[task]().languages)) | |
existing_languages = set(existing_languages) | |
### Find new languages introduced by "BibleNLPBitextMining" | |
new_languages = [] | |
new_task_languages = globals()[new_task]().languages | |
for lang in sorted(new_task_languages): | |
if not lang in existing_languages: | |
new_languages.append(lang) | |
### Report results | |
for idx, lang in enumerate(new_languages): | |
print(f"New language {idx+1}: {lang}") | |
print(f"Total new languages: {len(new_languages)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment