Skip to content

Instantly share code, notes, and snippets.

@av
Created October 6, 2024 16:30
Show Gist options
  • Save av/d0e54402b84a2a5efc71383d3abcbc9c to your computer and use it in GitHub Desktop.
Save av/d0e54402b84a2a5efc71383d3abcbc9c to your computer and use it in GitHub Desktop.
padbench
#!/bin/bash
# TASK=padbench
# TASK=bbh_256_slim
TASK=mmlu_256_slim
# Common
# h bench tasks ./scripts/bench/padbench.yaml
h bench tasks ./scripts/bench/$TASK.yaml
h config set bench.parallel 4
h config set boost.intermediate_output false
# Judge
# OpenRouter L3.1 70B
# h bench judge_prompt short
# h bench judge meta-llama/llama-3.1-70b-instruct
# h bench judge_api https://openrouter.ai/api
# h bench judge_key $OPENROUTER_KEY
# Small llama
h bench judge_prompt short
h bench judge llama3.1:8b-instruct-q6_K
h bench judge_api $(h url -i ollama)
h bench judge_key sk-ollama
h bench judge_tokens 2048
# L3.1 Test
# h bench model llama3.1:8b-instruct-q6_K
# h bench variants --temperature 0 --max_tokens 2048 --seed 42
# h bench api $(h url -i ollama)
# Baseline
h bench model llama3.1:8b-instruct-q6_K
h bench variants \
--temperature 0 \
--model llama3.1:8b-instruct-q6_K \
--max_tokens 2048 \
--seed 42
h bench api $(h url -i ollama)
h bench key sk-ollama
# Run
h bench run --name l3.1-$TASK-baseline
# Boost
h bench model cea-llama3.1:8b-instruct-q6_K
h bench variants \
--temperature 0 \
--model cea-llama3.1:8b-instruct-q6_K \
--model 3t-llama3.1:8b-instruct-q6_K \
--model ambi-llama3.1:8b-instruct-q6_K \
--model clarity-llama3.1:8b-instruct-q6_K \
--model fml-llama3.1:8b-instruct-q6_K \
--model l33t-llama3.1:8b-instruct-q6_K \
--model pad-llama3.1:8b-instruct-q6_K \
--max_tokens 2048 \
--seed 42 \
--@boost_cea_generations 64 \
--@boost_cea_rule 110
# --@boost_cea_generations 8 \
# --@boost_cea_generations 16 \
# --@boost_cea_generations 32 \
# --@boost_cea_rule 30 \
# --@boost_cea_rule 45 \
# --@boost_cea_rule 73 \
# --@boost_cea_rule 75 \
h bench api $(h url -i boost)
h bench key $(h config get boost.api.key)
# Run
h bench run --name l3.1-$TASK-cea
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment