av · September 12, 2024 21:13
diff --git a/cheese.yaml b/cheese.yaml
 - tags: [cheese]
  question: Which cheese is nicknamed "King of Cheeses" but paradoxically has a rind resembling concrete?
  criteria:
    correctness: Answer mentions Parmigiano-Reggiano
    bonus: Answer explains the paradox

 - tags: [cheese]
  question: What's the connection between a Norwegian brown cheese and caramel?
  criteria:
    correctness: Answer mentions caramelized milk sugars in any form
    bonus: Answer mentions Brunost or its alternative names

 - tags: [cheese]
  question: Which cheese's name translates to "re-cooked" and why?
  criteria:
    correctness: Answer mentions ricotta in any form
    bonus: Answer explains how ricotta is made

 - tags: [cheese]
  question: What unlikely ingredient gives Sage Derby its distinctive green veins?
  criteria:
    correctness: Answer mentions sage leaves (powdered or otherwise)
    bonus: Answer explains how Sage Derby is made
diff --git a/cheesebench.sh b/cheesebench.sh
 #!/bin/bash

 OPENROUTER_KEY=<your key>
 TASKS=/path/to/cheese.yaml
 NAME=cheese

 # Common
 h bench judge meta-llama/llama-3.1-70b-instruct
 h bench judge_api https://openrouter.ai/api
 h bench judge_key $OPENROUTER_KEY
 h bench tasks $TASKS
 h config set bench.parallel 4

 # Ollama
 h bench model llama3.1:8b-instruct-q2_K
 h bench api http://harbor.ollama:11434
 h bench variants --temperature 0 --temperature 0.5 --temperature 1.0 --max_tokens 1024 --model llama3.1:8b --model phi3:latest --model mistral:7b --model gemma2:latest --model mixtral:latest --model mistral-nemo:12b-instruct-2407-q8_0 --model dolphin-mixtral:8x7b --model codestral
 h bench run --name ollama-llama3.1-8b-q2_K-${NAME}
	- tags: [cheese]
	question: Which cheese is nicknamed "King of Cheeses" but paradoxically has a rind resembling concrete?
	criteria:
	correctness: Answer mentions Parmigiano-Reggiano
	bonus: Answer explains the paradox

	- tags: [cheese]
	question: What's the connection between a Norwegian brown cheese and caramel?
	criteria:
	correctness: Answer mentions caramelized milk sugars in any form
	bonus: Answer mentions Brunost or its alternative names

	- tags: [cheese]
	question: Which cheese's name translates to "re-cooked" and why?
	criteria:
	correctness: Answer mentions ricotta in any form
	bonus: Answer explains how ricotta is made

	- tags: [cheese]
	question: What unlikely ingredient gives Sage Derby its distinctive green veins?
	criteria:
	correctness: Answer mentions sage leaves (powdered or otherwise)
	bonus: Answer explains how Sage Derby is made
	#!/bin/bash

	OPENROUTER_KEY=<your key>
	TASKS=/path/to/cheese.yaml
	NAME=cheese

	# Common
	h bench judge meta-llama/llama-3.1-70b-instruct
	h bench judge_api https://openrouter.ai/api
	h bench judge_key $OPENROUTER_KEY
	h bench tasks $TASKS
	h config set bench.parallel 4

	# Ollama
	h bench model llama3.1:8b-instruct-q2_K
	h bench api http://harbor.ollama:11434
	h bench variants --temperature 0 --temperature 0.5 --temperature 1.0 --max_tokens 1024 --model llama3.1:8b --model phi3:latest --model mistral:7b --model gemma2:latest --model mixtral:latest --model mistral-nemo:12b-instruct-2407-q8_0 --model dolphin-mixtral:8x7b --model codestral
	h bench run --name ollama-llama3.1-8b-q2_K-${NAME}