Skip to content

Instantly share code, notes, and snippets.

@tcapelle
Created March 21, 2025 14:47
Show Gist options
  • Save tcapelle/16ba42005a598e89e4ab58dff7c6dad3 to your computer and use it in GitHub Desktop.
Save tcapelle/16ba42005a598e89e4ab58dff7c6dad3 to your computer and use it in GitHub Desktop.
Eval using Modal
import modal
from modal import Image
from evals import EVALUATIONS_CONFIGS
from evals.runner import run_eval
GPU_TYPE = "L4"
# Create Modal app
app = modal.App(name="eval_scorers")
HF_CACHE_DIR = "/hf-cache"
WANDB_CACHE_DIR = "/wandb-cache"
# Create Modal image with required dependencies
image = (Image.debian_slim()
.apt_install("git")
.pip_install_from_requirements("requirements.txt")
.env({"HF_HUB_CACHE": HF_CACHE_DIR,
"WANDB_CACHE_DIR": WANDB_CACHE_DIR})
.add_local_python_source("evals")
)
cache_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
wandb_volume = modal.Volume.from_name("wandb-cache", create_if_missing=True)
@app.function(
image=image,
gpu=GPU_TYPE,
secrets=[modal.Secret.from_name("wandb-api-key")],
timeout=1800,
volumes={HF_CACHE_DIR: cache_volume, WANDB_CACHE_DIR: wandb_volume},
)
def run_eval_modal(model_cls, dataset_url, weave_project, debug, sleep):
import torch
run_eval(
model_cls=model_cls,
dataset_url=dataset_url,
device="cuda" if torch.cuda.is_available() else "cpu",
weave_project=weave_project,
debug=debug,
sleep=sleep
)
@app.local_entrypoint()
def main(weave_project: str = "scorer_evaluation", scorer="all", debug: bool = False, sleep: int = 300):
if scorer == "all":
scorers_to_evaluate = EVALUATIONS_CONFIGS.values()
else:
scorers_to_evaluate = [EVALUATIONS_CONFIGS[scorer]]
iterator = [
(eval_config.model_cls, eval_config.dataset_url, weave_project, debug, 10 if debug else sleep)
for eval_config in scorers_to_evaluate
]
for result in run_eval_modal.starmap(iterator):
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment