tcapelle · March 21, 2025 14:47
diff --git a/eval_with_modal.py b/eval_with_modal.py
 import modal
 from modal import Image

 from evals import EVALUATIONS_CONFIGS
 from evals.runner import run_eval

 GPU_TYPE = "L4"

 # Create Modal app
 app = modal.App(name="eval_scorers")
 HF_CACHE_DIR = "/hf-cache"
 WANDB_CACHE_DIR = "/wandb-cache"

 # Create Modal image with required dependencies
 image = (Image.debian_slim()
         .apt_install("git")
         .pip_install_from_requirements("requirements.txt")
         .env({"HF_HUB_CACHE": HF_CACHE_DIR,
               "WANDB_CACHE_DIR": WANDB_CACHE_DIR})
         .add_local_python_source("evals")
 )

 cache_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
 wandb_volume = modal.Volume.from_name("wandb-cache", create_if_missing=True)

 @app.function(
    image=image,
    gpu=GPU_TYPE,
    secrets=[modal.Secret.from_name("wandb-api-key")],
    timeout=1800,
    volumes={HF_CACHE_DIR: cache_volume, WANDB_CACHE_DIR: wandb_volume},
 )
 def run_eval_modal(model_cls, dataset_url, weave_project, debug, sleep):
    import torch
    run_eval(
        model_cls=model_cls,
        dataset_url=dataset_url,
        device="cuda" if torch.cuda.is_available() else "cpu",
        weave_project=weave_project,
        debug=debug,
        sleep=sleep
    )

 @app.local_entrypoint()
 def main(weave_project: str = "scorer_evaluation", scorer="all", debug: bool = False, sleep: int = 300):
    if scorer == "all":
        scorers_to_evaluate = EVALUATIONS_CONFIGS.values()
    else:
        scorers_to_evaluate = [EVALUATIONS_CONFIGS[scorer]]

    iterator = [
        (eval_config.model_cls, eval_config.dataset_url, weave_project, debug, 10 if debug else sleep)
        for eval_config in scorers_to_evaluate
    ]
    for result in run_eval_modal.starmap(iterator):
        print(result)
	import modal
	from modal import Image

	from evals import EVALUATIONS_CONFIGS
	from evals.runner import run_eval

	GPU_TYPE = "L4"

	# Create Modal app
	app = modal.App(name="eval_scorers")
	HF_CACHE_DIR = "/hf-cache"
	WANDB_CACHE_DIR = "/wandb-cache"

	# Create Modal image with required dependencies
	image = (Image.debian_slim()
	.apt_install("git")
	.pip_install_from_requirements("requirements.txt")
	.env({"HF_HUB_CACHE": HF_CACHE_DIR,
	"WANDB_CACHE_DIR": WANDB_CACHE_DIR})
	.add_local_python_source("evals")
	)

	cache_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
	wandb_volume = modal.Volume.from_name("wandb-cache", create_if_missing=True)

	@app.function(
	image=image,
	gpu=GPU_TYPE,
	secrets=[modal.Secret.from_name("wandb-api-key")],
	timeout=1800,
	volumes={HF_CACHE_DIR: cache_volume, WANDB_CACHE_DIR: wandb_volume},
	)
	def run_eval_modal(model_cls, dataset_url, weave_project, debug, sleep):
	import torch
	run_eval(
	model_cls=model_cls,
	dataset_url=dataset_url,
	device="cuda" if torch.cuda.is_available() else "cpu",
	weave_project=weave_project,
	debug=debug,
	sleep=sleep
	)

	@app.local_entrypoint()
	def main(weave_project: str = "scorer_evaluation", scorer="all", debug: bool = False, sleep: int = 300):
	if scorer == "all":
	scorers_to_evaluate = EVALUATIONS_CONFIGS.values()
	else:
	scorers_to_evaluate = [EVALUATIONS_CONFIGS[scorer]]

	iterator = [
	(eval_config.model_cls, eval_config.dataset_url, weave_project, debug, 10 if debug else sleep)
	for eval_config in scorers_to_evaluate
	]
	for result in run_eval_modal.starmap(iterator):
	print(result)