Skip to content

Instantly share code, notes, and snippets.

@davidstap
Created April 28, 2024 12:18
Show Gist options
  • Save davidstap/787acb00fecb24559439b08cac30c5ea to your computer and use it in GitHub Desktop.
Save davidstap/787acb00fecb24559439b08cac30c5ea to your computer and use it in GitHub Desktop.
Calculate new languages for new MMTEB task
from mteb.tasks.BitextMining import *
new_task = "BibleNLPBitextMining"
### Gather existing BitextMining tasks
ignore = ["AbsTaskBitextMining", "Any", "CrosslingualTask", "TaskMetadata", "TEST_SAMPLES"]
ignore.append(new_task)
existing_tasks = [
task for task in dir() if task[0].isupper() and task not in ignore
]
### Find all languages currently in BitextMining
existing_languages = []
for task in existing_tasks:
existing_languages.extend(list(globals()[task]().languages))
existing_languages = set(existing_languages)
### Find new languages introduced by "BibleNLPBitextMining"
new_languages = []
new_task_languages = globals()[new_task]().languages
for lang in sorted(new_task_languages):
if not lang in existing_languages:
new_languages.append(lang)
### Report results
for idx, lang in enumerate(new_languages):
print(f"New language {idx+1}: {lang}")
print(f"Total new languages: {len(new_languages)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment