Skip to content

Instantly share code, notes, and snippets.

View tcapelle's full-sized avatar
😄
happy

Thomas Capelle tcapelle

😄
happy
View GitHub Profile
@tcapelle
tcapelle / weave-eval-skill.md
Last active April 27, 2026 20:01
A Weave Eval Skill

name: weave-eval description: Convert any kind of evaluation code into a Weave Evaluation. Use whenever the user has scoring code, an evaluation loop, a model-vs-ground-truth comparison, a list of predictions with metrics, a pandas DataFrame / CSV / JSONL of results to log a posteriori, a per-row loop with a black-box model (remote API, third-party agent), or any "I'm checking how well my model/LLM/agent performs" workflow and wants it logged to Weave. Covers both the declarative (weave.Evaluation) and imperative (weave.EvaluationLogger) APIs and picks between them. Triggers on phrases like "convert this to a weave evaluation", "wrap this in weave.Evaluation", "log this as a weave eval", "use EvaluationLogger", "log this batch to weave", "track this in weave", or any code resembling an eval loop where the user asks for Weave integration. Also use proactively when the user mentions evaluating LLMs/models and is already using weave.init — the right answer is almost always one of the two APIs in this s

image: lmsysorg/sglang:latest
command:
- /bin/bash
- -c
- |
pip install --break-system-packages sglang==0.3.2.dev9039+pr-17247.g90c446848 --extra-index-url https://sgl-project.github.io/whl/pr/ && \
pip install --break-system-packages git+https://github.com/huggingface/transformers.git@76732b4e7120808ff989edbd16401f61fa6a0afa && \
python3 -m sglang.launch_server \
--model-path zai-org/GLM-4.7-Flash \
--trust-remote-code \
import torch
from transformers import AutoTokenizer
from trl import GRPOConfig, GRPOTrainer
import re
import wandb
import accelerate
from math_verify import parse, verify, ExprExtractionConfig
from datasets import load_dataset, Dataset
accelerator = accelerate.Accelerator()
@tcapelle
tcapelle / eval_with_modal.py
Created March 21, 2025 14:47
Eval using Modal
import modal
from modal import Image
from evals import EVALUATIONS_CONFIGS
from evals.runner import run_eval
GPU_TYPE = "L4"
# Create Modal app
app = modal.App(name="eval_scorers")
from dataclasses import dataclass
import simple_parsing as sp
from typing import Literal
import modal
from modal import Image
from evals import EVALUATIONS_CONFIGS
from evals.runner import run_eval
@tcapelle
tcapelle / eval_modal.py
Last active January 13, 2025 20:41
Evaluation on Modal
# put your wandb api key in the modal secrets
# >modal run eval_latency_modal.py
import time
import logging
import numpy as np
from rich.console import Console
from rich.table import Table
import modal
from modal import Image
@tcapelle
tcapelle / bs_blocker.py
Last active November 30, 2024 07:14
A Bluesky auto blocker with AI moderation
# pip install weave openai atproto rich
import os
import openai
import weave
from atproto import Client
from rich.console import Console
from rich.rule import Rule
from datetime import datetime, timedelta, timezone
import warnings
@tcapelle
tcapelle / call_llama90.py
Created October 15, 2024 15:39
Simple script to call Llama 90b
import os, openai, weave
MODEL = "meta-llama/Llama-3.2-90B-Vision-Instruct"
weave.init("llama32_90B_EU")
image_url = "https://www.hachette.fr/sites/default/files/webmasters/l023-1.png"
llama_client = openai.OpenAI(
base_url="http://195.242.25.198:8032/v1",
api_key=os.environ.get("WANDB_API_KEY")
@tcapelle
tcapelle / llama32.py
Last active September 27, 2024 17:00
import os, openai, weave
MODEL = "Llama-3.2-90B-Vision-Instruct"
weave.init("EU_HAS_LLAMA_90B")
image_url = "https://limaspanishhouse.com/wp-content/uploads/2021/02/peruvian-llama-2-1536x1346.jpg"
llama_client = openai.OpenAI(
base_url="http://195.242.25.198:8032/v1",
api_key=os.environ.get("WANDB_API_KEY")
import asyncio
import os
import base64
from pathlib import Path
from mistralai import Mistral
image_path = "test_24915.jpg"
temperature = 0.0
max_tokens = None