Created
September 25, 2024 19:51
-
-
Save av/8f8b00a06c6e1a583b20264826a5347f to your computer and use it in GitHub Desktop.
Small Llama 3.2 Benchmarks
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Harbor Bench</title> | |
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | |
<style> | |
body { | |
font-family: Arial, sans-serif; | |
line-height: 1.6; | |
color: #333; | |
max-width: 1200px; | |
margin: 0 auto; | |
padding: 20px; | |
} | |
h1, h2, h3 { | |
color: #2c3e50; | |
} | |
.summary { | |
background-color: #f8f9fa; | |
border-radius: 5px; | |
padding: 15px; | |
margin-bottom: 20px; | |
} | |
.chart-grid { | |
display: grid; | |
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); | |
gap: 20px; | |
margin-bottom: 20px; | |
} | |
.chart-container { | |
background-color: #fff; | |
border-radius: 5px; | |
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); | |
padding: 15px; | |
height: 360px; | |
} | |
table { | |
width: 100%; | |
border-collapse: collapse; | |
margin-bottom: 20px; | |
} | |
th, td { | |
border: 1px solid #ddd; | |
padding: 8px; | |
text-align: left; | |
} | |
th { | |
background-color: #f2f2f2; | |
} | |
tr:nth-child(even) { | |
background-color: #f9f9f9; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Bench</h1> | |
<div class="summary"> | |
<h2>Summary</h2> | |
<p>Total tasks: <span id="totalTasks"></span></p> | |
<p>Overall success rate: <span id="overallSuccessRate"></span>%</p> | |
<p>Average task duration: <span id="averageTaskDuration"></span> ms</p> | |
<p>Task duration range: <span id="minTaskDuration"></span> ms - <span id="maxTaskDuration"></span> ms</p> | |
</div> | |
<h2>Results</h2> | |
<div id="chartGrid" class="chart-grid"></div> | |
<h2>Detailed Results</h2> | |
<table id="resultsTable"> | |
<thead> | |
<tr> | |
<th>Task ID</th> | |
<th>Result</th> | |
<th>Tags</th> | |
<th>Duration (ms)</th> | |
<th>LLM Model</th> | |
<th>Judge Model</th> | |
</tr> | |
</thead> | |
<tbody> | |
</tbody> | |
</table> | |
<script> | |
const data = [{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":8139,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":7066,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":7211,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":7143,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":2771,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":216,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":275,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":241,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":215,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":1,"tags":["bbh"],"time":186,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":0,"tags":["bbh"],"time":265,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":1,"tags":["bbh"],"time":236,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":259,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":166,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":1,"tags":["bbh"],"time":155,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":243,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":1,"tags":["bbh"],"time":363,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":201,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":1,"tags":["bbh"],"time":228,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":2670,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":195,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":1430,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":1,"tags":["bbh"],"time":192,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":592,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":163,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":171,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":1809,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":248,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":209,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":468,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":237,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":238,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":181,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":1,"tags":["bbh"],"time":180,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":214,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":372,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":1666,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":0,"tags":["bbh"],"time":220,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":338,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":0,"tags":["bbh"],"time":261,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":0,"tags":["bbh"],"time":190,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":216,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":927,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":1,"tags":["bbh"],"time":212,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":1,"tags":["bbh"],"time":348,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":1,"tags":["bbh"],"time":159,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":317,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":297,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":188,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":1,"tags":["bbh"],"time":202,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":0,"tags":["bbh"],"time":212,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":204,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":211,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":1,"tags":["bbh"],"time":219,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":1,"tags":["bbh"],"time":280,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":292,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":751,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":216,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":228,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":174,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":190,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":192,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":188,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":179,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":486,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":220,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":199,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":982,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":0,"tags":["bbh"],"time":249,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":1,"tags":["bbh"],"time":180,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":1,"tags":["bbh"],"time":377,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":1,"tags":["bbh"],"time":188,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":1,"tags":["bbh"],"time":181,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":180,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":198,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":204,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":0,"tags":["bbh"],"time":222,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":1,"tags":["bbh"],"time":240,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":1,"tags":["bbh"],"time":363,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":249,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":209,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":1326,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":373,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":241,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":1,"tags":["bbh"],"time":333,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":186,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":185,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":218,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":195,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":1,"tags":["bbh"],"time":196,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":161,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":0,"tags":["bbh"],"time":196,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":194,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":455,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":165,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":209,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":1,"tags":["bbh"],"time":1408,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":1,"tags":["bbh"],"time":160,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":236,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":1,"tags":["bbh"],"time":218,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":175,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":401,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":1,"tags":["bbh"],"time":225,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":1,"tags":["bbh"],"time":224,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":244,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":207,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":170,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":537,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":0,"tags":["bbh"],"time":212,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":237,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":216,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":239,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":1,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":1,"tags":["bbh"],"time":208,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":195,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":331,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":0,"tags":["bbh"],"time":235,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":1,"tags":["bbh"],"time":413,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":1,"tags":["bbh"],"time":808,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":1,"tags":["bbh"],"time":159,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":1,"tags":["bbh"],"time":214,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":1,"tags":["bbh"],"time":213,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":1256,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":174,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":382,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":149,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":1,"tags":["bbh"],"time":212,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":255,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":163,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":210,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":1,"tags":["bbh"],"time":359,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":163,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":272,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":149,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":224,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":0,"tags":["bbh"],"time":224,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":225,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":206,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":207,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":0,"tags":["bbh"],"time":181,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":1,"tags":["bbh"],"time":225,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":0,"tags":["bbh"],"time":311,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":254,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":1,"tags":["bbh"],"time":170,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":148,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":1,"tags":["bbh"],"time":362,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":1,"tags":["bbh"],"time":203,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":0,"tags":["bbh"],"time":247,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":1,"tags":["bbh"],"time":361,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":1,"tags":["bbh"],"time":232,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":0,"tags":["bbh"],"time":223,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":181,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":1,"tags":["bbh"],"time":196,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":0,"tags":["bbh"],"time":196,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":177,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":192,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":1,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":1528,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":1769,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":234,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":203,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":1209,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":1,"tags":["bbh"],"time":166,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":207,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":1,"tags":["bbh"],"time":207,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":286,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":247,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":1610,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":209,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":301,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":1,"tags":["bbh"],"time":231,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":158,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":1,"tags":["bbh"],"time":161,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":1,"tags":["bbh"],"time":198,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":1,"tags":["bbh"],"time":225,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":186,"llm.model":"llama3.1:8b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":2683,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":1043,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":0,"tags":["bbh"],"time":1155,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1252,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":839,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":191,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":425,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":559,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":384,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":578,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":584,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":278,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":639,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":250,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":0,"tags":["bbh"],"time":660,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":225,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":203,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":1,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":547,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":1,"tags":["bbh"],"time":437,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":299,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":596,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":431,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":359,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":0,"tags":["bbh"],"time":284,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":239,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":0,"tags":["bbh"],"time":252,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":401,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":1,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":513,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":664,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":363,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":292,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":1,"tags":["bbh"],"time":572,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":524,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":917,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":860,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":437,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":578,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":1511,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":222,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":0,"tags":["bbh"],"time":463,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":221,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":1,"tags":["bbh"],"time":685,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":485,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":527,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":0,"tags":["bbh"],"time":608,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":408,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":295,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":1,"tags":["bbh"],"time":440,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":506,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":453,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":491,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":470,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":340,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":205,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":380,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":0,"tags":["bbh"],"time":249,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":523,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":599,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":0,"tags":["bbh"],"time":189,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":225,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":1,"tags":["bbh"],"time":227,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":1,"tags":["bbh"],"time":431,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":0,"tags":["bbh"],"time":533,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":368,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":683,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":284,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":197,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":1,"tags":["bbh"],"time":866,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":1,"tags":["bbh"],"time":163,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":470,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":0,"tags":["bbh"],"time":337,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":6426,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":201,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":1,"tags":["bbh"],"time":210,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":6553,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":492,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":1,"tags":["bbh"],"time":316,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":478,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":1,"tags":["bbh"],"time":248,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":177,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":306,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":2721,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":620,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":376,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":1472,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":494,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":0,"tags":["bbh"],"time":411,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":0,"tags":["bbh"],"time":321,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":0,"tags":["bbh"],"time":327,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":857,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":0,"tags":["bbh"],"time":1773,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":1961,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":0,"tags":["bbh"],"time":226,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":0,"tags":["bbh"],"time":493,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":429,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":1,"tags":["bbh"],"time":448,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":521,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":0,"tags":["bbh"],"time":528,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":753,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":0,"tags":["bbh"],"time":600,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":1,"tags":["bbh"],"time":525,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":541,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":496,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":1,"tags":["bbh"],"time":292,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":449,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":268,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":1133,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":1,"tags":["bbh"],"time":505,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":498,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":1398,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":0,"tags":["bbh"],"time":455,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":472,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":414,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":481,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":398,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":1,"tags":["bbh"],"time":441,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":384,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":493,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":564,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":408,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":0,"tags":["bbh"],"time":177,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":1,"tags":["bbh"],"time":270,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":826,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":746,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":238,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":504,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":572,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":1,"tags":["bbh"],"time":6338,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":1977,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":0,"tags":["bbh"],"time":734,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":210,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":230,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":361,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":945,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":470,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":1,"tags":["bbh"],"time":246,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":305,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":6465,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":280,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":222,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":1,"tags":["bbh"],"time":280,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":367,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":1190,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":241,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":0,"tags":["bbh"],"time":182,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":1,"tags":["bbh"],"time":370,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":0,"tags":["bbh"],"time":236,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":250,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":1052,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":690,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":0,"tags":["bbh"],"time":243,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":548,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":218,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":1,"tags":["bbh"],"time":468,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":1,"tags":["bbh"],"time":511,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":851,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":284,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":0,"tags":["bbh"],"time":545,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":209,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":0,"tags":["bbh"],"time":314,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":901,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":0,"tags":["bbh"],"time":620,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":567,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":1303,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":502,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":160,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":254,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":1,"tags":["bbh"],"time":851,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":699,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":675,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":0,"tags":["bbh"],"time":255,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":1,"tags":["bbh"],"time":575,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":464,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":204,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":373,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":545,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":351,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":616,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":872,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":1290,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":1,"tags":["bbh"],"time":176,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":0,"tags":["bbh"],"time":372,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":777,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":0,"tags":["bbh"],"time":319,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":1,"tags":["bbh"],"time":167,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":233,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":0,"tags":["bbh"],"time":233,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":239,"llm.model":"llama3.2:1b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":1687,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":1559,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":1641,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1552,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":213,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":198,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":167,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":1,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":315,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":60,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":317,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":190,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":169,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":1,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":0,"tags":["bbh"],"time":293,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":396,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":175,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":446,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":1,"tags":["bbh"],"time":235,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":1,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":0,"tags":["bbh"],"time":82,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":228,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":1,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":228,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":925,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":1,"tags":["bbh"],"time":157,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":1,"tags":["bbh"],"time":60,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":49,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":212,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":174,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":260,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":172,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":69,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":0,"tags":["bbh"],"time":63,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":74,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":1,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":184,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":58,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":208,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":149,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":207,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":337,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":429,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":417,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":263,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":59,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":1,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":56,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":306,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":235,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":178,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":257,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":3474,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":242,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":1,"tags":["bbh"],"time":258,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":1,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":63,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":1895,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":1,"tags":["bbh"],"time":1488,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":1489,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1489,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":223,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":181,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":70,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":1,"tags":["bbh"],"time":230,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":182,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":1,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":578,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":424,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":0,"tags":["bbh"],"time":74,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":61,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":56,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":355,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":0,"tags":["bbh"],"time":235,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":58,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":191,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":196,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":1,"tags":["bbh"],"time":76,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":382,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":213,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":189,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":1,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":201,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":1,"tags":["bbh"],"time":169,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":182,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":212,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":1,"tags":["bbh"],"time":174,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":69,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":156,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":67,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":419,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":1,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":399,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":213,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":334,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":58,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":174,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":1,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":274,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":187,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":213,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":57,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":51,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":3486,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":1,"tags":["bbh"],"time":61,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":1,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q4_K_M","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":1928,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":1513,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":1537,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1478,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":327,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":1,"tags":["bbh"],"time":58,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":527,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":439,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":232,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":1,"tags":["bbh"],"time":204,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":374,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":1,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":261,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":160,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":390,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":180,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":228,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":1,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":219,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":1,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":244,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":1,"tags":["bbh"],"time":149,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":183,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":180,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":0,"tags":["bbh"],"time":69,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":167,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":149,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":1,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":225,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":1,"tags":["bbh"],"time":366,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":1,"tags":["bbh"],"time":70,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":281,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":76,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":199,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":206,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":331,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":1,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":61,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":1,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":1,"tags":["bbh"],"time":268,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":150,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":161,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":1,"tags":["bbh"],"time":5044,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":290,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":524,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":0,"tags":["bbh"],"time":465,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":4729,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":1,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":76,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":72,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":1,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":166,"llm.model":"llama3.2:1b-instruct-q6_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":1836,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":1,"tags":["bbh"],"time":1462,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":0,"tags":["bbh"],"time":1719,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1462,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":268,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":90,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":59,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":808,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":458,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":204,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":226,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":1,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":1,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":349,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":218,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":0,"tags":["bbh"],"time":77,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":201,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":60,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":463,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":6137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":312,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":57,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":1,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":78,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":177,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":61,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":159,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":181,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":162,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":1,"tags":["bbh"],"time":422,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":63,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":64,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":348,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":82,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":70,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":57,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":361,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":291,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":1,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":1,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":248,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":1,"tags":["bbh"],"time":80,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":60,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":76,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":537,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":364,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":218,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":1,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":74,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":4555,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":1,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":71,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":91,"llm.model":"llama3.2:1b-instruct-q8_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":1872,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":1252,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":0,"tags":["bbh"],"time":1526,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1335,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":350,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":82,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":1,"tags":["bbh"],"time":181,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":160,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":60,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":834,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":517,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":149,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":68,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":1,"tags":["bbh"],"time":85,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":175,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":263,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":64,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":1,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":1,"tags":["bbh"],"time":66,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":1,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":458,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":167,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":229,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":181,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":0,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":93,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":150,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":1,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":231,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":349,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":132,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":64,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":588,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":8471,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":317,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":0,"tags":["bbh"],"time":64,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":180,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":278,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":64,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":59,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":144,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":1,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":167,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":1,"tags":["bbh"],"time":143,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":1,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":257,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":65,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":88,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":73,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":1,"tags":["bbh"],"time":599,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":149,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":1,"tags":["bbh"],"time":188,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":1,"tags":["bbh"],"time":63,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":67,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":59,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":446,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":189,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":74,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":158,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":63,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":341,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":0,"tags":["bbh"],"time":61,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":387,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":96,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":79,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":135,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":0,"tags":["bbh"],"time":85,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":180,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":1,"tags":["bbh"],"time":97,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":86,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":115,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":1,"tags":["bbh"],"time":148,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":90,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":675,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":194,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":1,"tags":["bbh"],"time":116,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":474,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":0,"tags":["bbh"],"time":75,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":92,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":0,"tags":["bbh"],"time":66,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":6928,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":0,"tags":["bbh"],"time":175,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":1,"tags":["bbh"],"time":111,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":89,"llm.model":"llama3.2:1b-instruct-fp16","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":12399,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":1432,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":1320,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":1586,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":367,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":177,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":186,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":213,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":208,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":0,"tags":["bbh"],"time":175,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":189,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":1,"tags":["bbh"],"time":226,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":252,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":1,"tags":["bbh"],"time":142,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":450,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":197,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":153,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":1068,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":145,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":544,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":1,"tags":["bbh"],"time":117,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":226,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":192,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":0,"tags":["bbh"],"time":69,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":122,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":262,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":0,"tags":["bbh"],"time":244,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":393,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":209,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":1,"tags":["bbh"],"time":176,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":329,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":1,"tags":["bbh"],"time":171,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":180,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":238,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":1,"tags":["bbh"],"time":235,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":1,"tags":["bbh"],"time":186,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":567,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":154,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":1,"tags":["bbh"],"time":203,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":223,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":1,"tags":["bbh"],"time":124,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":268,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":91,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":1,"tags":["bbh"],"time":112,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":1,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":1,"tags":["bbh"],"time":184,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":164,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":324,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":0,"tags":["bbh"],"time":283,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":0,"tags":["bbh"],"time":148,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":468,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":202,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":1,"tags":["bbh"],"time":145,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":502,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":0,"tags":["bbh"],"time":3450,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":141,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":217,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":278,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":1,"tags":["bbh"],"time":157,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":246,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":322,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":0,"tags":["bbh"],"time":201,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":0,"tags":["bbh"],"time":199,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":151,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":261,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":221,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":1,"tags":["bbh"],"time":151,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":237,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":146,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":1,"tags":["bbh"],"time":123,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":0,"tags":["bbh"],"time":219,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":116,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":0,"tags":["bbh"],"time":500,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":197,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":1,"tags":["bbh"],"time":152,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":282,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":1,"tags":["bbh"],"time":156,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":200,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":202,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":1,"tags":["bbh"],"time":185,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":204,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":0,"tags":["bbh"],"time":140,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":1,"tags":["bbh"],"time":312,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":1,"tags":["bbh"],"time":182,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":1,"tags":["bbh"],"time":77,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":131,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":1,"tags":["bbh"],"time":216,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":269,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":1,"tags":["bbh"],"time":230,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":157,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":1,"tags":["bbh"],"time":636,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":131,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":1,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":227,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":1067,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":124,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":750,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":1,"tags":["bbh"],"time":145,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":221,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":150,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":0,"tags":["bbh"],"time":197,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":288,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":282,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":147,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":0,"tags":["bbh"],"time":513,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":1,"tags":["bbh"],"time":213,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":0,"tags":["bbh"],"time":111,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":0,"tags":["bbh"],"time":142,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":242,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":882,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":902,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":64,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":172,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":84,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":0,"tags":["bbh"],"time":297,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":1,"tags":["bbh"],"time":976,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":1,"tags":["bbh"],"time":113,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":206,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":278,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":1,"tags":["bbh"],"time":133,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":1,"tags":["bbh"],"time":688,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":1,"tags":["bbh"],"time":260,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":0,"tags":["bbh"],"time":117,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":119,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.208.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.209.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.210.correctness","result":0,"tags":["bbh"],"time":380,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.211.correctness","result":1,"tags":["bbh"],"time":196,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.212.correctness","result":1,"tags":["bbh"],"time":156,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.213.correctness","result":0,"tags":["bbh"],"time":168,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.214.correctness","result":1,"tags":["bbh"],"time":93,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.215.correctness","result":0,"tags":["bbh"],"time":218,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.216.correctness","result":0,"tags":["bbh"],"time":229,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.217.correctness","result":0,"tags":["bbh"],"time":315,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.218.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.219.correctness","result":1,"tags":["bbh"],"time":147,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.220.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.221.correctness","result":1,"tags":["bbh"],"time":121,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.222.correctness","result":0,"tags":["bbh"],"time":358,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.223.correctness","result":0,"tags":["bbh"],"time":149,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.224.correctness","result":1,"tags":["bbh"],"time":189,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.225.correctness","result":0,"tags":["bbh"],"time":185,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.226.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.227.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.228.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.229.correctness","result":0,"tags":["bbh"],"time":8267,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.230.correctness","result":1,"tags":["bbh"],"time":200,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.231.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.232.correctness","result":1,"tags":["bbh"],"time":723,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.233.correctness","result":0,"tags":["bbh"],"time":8160,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.234.correctness","result":1,"tags":["bbh"],"time":172,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.235.correctness","result":0,"tags":["bbh"],"time":169,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.236.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.237.correctness","result":0,"tags":["bbh"],"time":179,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.238.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.239.correctness","result":1,"tags":["bbh"],"time":143,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.240.correctness","result":1,"tags":["bbh"],"time":165,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.241.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.242.correctness","result":1,"tags":["bbh"],"time":193,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.243.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.244.correctness","result":0,"tags":["bbh"],"time":563,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.245.correctness","result":0,"tags":["bbh"],"time":192,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.246.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.247.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.248.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.249.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.250.correctness","result":0,"tags":["bbh"],"time":145,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.251.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.252.correctness","result":0,"tags":["bbh"],"time":175,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.253.correctness","result":0,"tags":["bbh"],"time":186,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.254.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.255.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.256.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:3b-instruct-q2_K","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.1.correctness","result":0,"tags":["bbh"],"time":2647,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.2.correctness","result":0,"tags":["bbh"],"time":2060,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.3.correctness","result":1,"tags":["bbh"],"time":2094,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.4.correctness","result":0,"tags":["bbh"],"time":2191,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.5.correctness","result":0,"tags":["bbh"],"time":473,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.6.correctness","result":1,"tags":["bbh"],"time":369,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.7.correctness","result":0,"tags":["bbh"],"time":112,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.8.correctness","result":0,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.9.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.10.correctness","result":0,"tags":["bbh"],"time":171,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.11.correctness","result":1,"tags":["bbh"],"time":508,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.12.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.13.correctness","result":0,"tags":["bbh"],"time":191,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.14.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.15.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.16.correctness","result":0,"tags":["bbh"],"time":87,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.17.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.18.correctness","result":1,"tags":["bbh"],"time":149,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.19.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.20.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.21.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.22.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.23.correctness","result":1,"tags":["bbh"],"time":453,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.24.correctness","result":1,"tags":["bbh"],"time":213,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.25.correctness","result":0,"tags":["bbh"],"time":125,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.26.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.27.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.28.correctness","result":0,"tags":["bbh"],"time":176,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.29.correctness","result":0,"tags":["bbh"],"time":1022,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.30.correctness","result":1,"tags":["bbh"],"time":131,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.31.correctness","result":0,"tags":["bbh"],"time":601,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.32.correctness","result":1,"tags":["bbh"],"time":131,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.33.correctness","result":0,"tags":["bbh"],"time":324,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.34.correctness","result":0,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.35.correctness","result":1,"tags":["bbh"],"time":381,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.36.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.37.correctness","result":1,"tags":["bbh"],"time":163,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.38.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.39.correctness","result":1,"tags":["bbh"],"time":120,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.40.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.41.correctness","result":0,"tags":["bbh"],"time":322,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.42.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.43.correctness","result":0,"tags":["bbh"],"time":100,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.44.correctness","result":0,"tags":["bbh"],"time":492,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.45.correctness","result":0,"tags":["bbh"],"time":141,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.46.correctness","result":0,"tags":["bbh"],"time":126,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.47.correctness","result":1,"tags":["bbh"],"time":67,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.48.correctness","result":0,"tags":["bbh"],"time":165,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.49.correctness","result":0,"tags":["bbh"],"time":184,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.50.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.51.correctness","result":0,"tags":["bbh"],"time":407,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.52.correctness","result":1,"tags":["bbh"],"time":125,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.53.correctness","result":1,"tags":["bbh"],"time":135,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.54.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.55.correctness","result":0,"tags":["bbh"],"time":376,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.56.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.57.correctness","result":1,"tags":["bbh"],"time":171,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.58.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.59.correctness","result":1,"tags":["bbh"],"time":170,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.60.correctness","result":0,"tags":["bbh"],"time":116,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.61.correctness","result":0,"tags":["bbh"],"time":132,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.62.correctness","result":0,"tags":["bbh"],"time":81,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.63.correctness","result":0,"tags":["bbh"],"time":136,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.64.correctness","result":0,"tags":["bbh"],"time":188,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.65.correctness","result":0,"tags":["bbh"],"time":457,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.66.correctness","result":1,"tags":["bbh"],"time":192,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.67.correctness","result":0,"tags":["bbh"],"time":146,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.68.correctness","result":1,"tags":["bbh"],"time":109,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.69.correctness","result":0,"tags":["bbh"],"time":178,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.70.correctness","result":0,"tags":["bbh"],"time":193,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.71.correctness","result":0,"tags":["bbh"],"time":130,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.72.correctness","result":1,"tags":["bbh"],"time":270,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.73.correctness","result":0,"tags":["bbh"],"time":105,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.74.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.75.correctness","result":1,"tags":["bbh"],"time":140,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.76.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.77.correctness","result":0,"tags":["bbh"],"time":62,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.78.correctness","result":0,"tags":["bbh"],"time":61,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.79.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.80.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.81.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.82.correctness","result":1,"tags":["bbh"],"time":136,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.83.correctness","result":0,"tags":["bbh"],"time":101,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.84.correctness","result":0,"tags":["bbh"],"time":127,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.85.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.86.correctness","result":1,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.87.correctness","result":0,"tags":["bbh"],"time":515,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.88.correctness","result":0,"tags":["bbh"],"time":167,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.89.correctness","result":0,"tags":["bbh"],"time":312,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.90.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.91.correctness","result":0,"tags":["bbh"],"time":144,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.92.correctness","result":0,"tags":["bbh"],"time":155,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.93.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.94.correctness","result":1,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.95.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.96.correctness","result":1,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.97.correctness","result":1,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.98.correctness","result":1,"tags":["bbh"],"time":176,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.99.correctness","result":1,"tags":["bbh"],"time":307,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.100.correctness","result":0,"tags":["bbh"],"time":278,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.101.correctness","result":0,"tags":["bbh"],"time":293,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.102.correctness","result":0,"tags":["bbh"],"time":268,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.103.correctness","result":0,"tags":["bbh"],"time":124,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.104.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.105.correctness","result":0,"tags":["bbh"],"time":367,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.106.correctness","result":0,"tags":["bbh"],"time":193,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.107.correctness","result":1,"tags":["bbh"],"time":126,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.108.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.109.correctness","result":0,"tags":["bbh"],"time":170,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.110.correctness","result":1,"tags":["bbh"],"time":208,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.111.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.112.correctness","result":0,"tags":["bbh"],"time":153,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.113.correctness","result":0,"tags":["bbh"],"time":72,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.114.correctness","result":0,"tags":["bbh"],"time":89,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.115.correctness","result":0,"tags":["bbh"],"time":161,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.116.correctness","result":0,"tags":["bbh"],"time":483,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.117.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.118.correctness","result":1,"tags":["bbh"],"time":213,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.119.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.120.correctness","result":1,"tags":["bbh"],"time":150,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.121.correctness","result":1,"tags":["bbh"],"time":137,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.122.correctness","result":1,"tags":["bbh"],"time":338,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.123.correctness","result":0,"tags":["bbh"],"time":396,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.124.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.125.correctness","result":1,"tags":["bbh"],"time":184,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.126.correctness","result":1,"tags":["bbh"],"time":254,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.127.correctness","result":0,"tags":["bbh"],"time":433,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.128.correctness","result":0,"tags":["bbh"],"time":96,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.129.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.130.correctness","result":1,"tags":["bbh"],"time":87,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.131.correctness","result":0,"tags":["bbh"],"time":152,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.132.correctness","result":0,"tags":["bbh"],"time":386,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.133.correctness","result":1,"tags":["bbh"],"time":354,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.134.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.135.correctness","result":1,"tags":["bbh"],"time":145,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.136.correctness","result":0,"tags":["bbh"],"time":120,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.137.correctness","result":0,"tags":["bbh"],"time":83,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.138.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.139.correctness","result":1,"tags":["bbh"],"time":232,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.140.correctness","result":1,"tags":["bbh"],"time":227,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.141.correctness","result":1,"tags":["bbh"],"time":149,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.142.correctness","result":1,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.143.correctness","result":0,"tags":["bbh"],"time":266,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.144.correctness","result":0,"tags":["bbh"],"time":99,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.145.correctness","result":1,"tags":["bbh"],"time":100,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.146.correctness","result":0,"tags":["bbh"],"time":94,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.147.correctness","result":0,"tags":["bbh"],"time":110,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.148.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.149.correctness","result":1,"tags":["bbh"],"time":168,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.150.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.151.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.152.correctness","result":1,"tags":["bbh"],"time":171,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.153.correctness","result":0,"tags":["bbh"],"time":300,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.154.correctness","result":0,"tags":["bbh"],"time":103,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.155.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.156.correctness","result":0,"tags":["bbh"],"time":851,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.157.correctness","result":0,"tags":["bbh"],"time":160,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.158.correctness","result":0,"tags":["bbh"],"time":104,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.159.correctness","result":1,"tags":["bbh"],"time":87,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.160.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.161.correctness","result":0,"tags":["bbh"],"time":95,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.162.correctness","result":0,"tags":["bbh"],"time":182,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.163.correctness","result":0,"tags":["bbh"],"time":162,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.164.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.165.correctness","result":0,"tags":["bbh"],"time":633,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.166.correctness","result":1,"tags":["bbh"],"time":108,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.167.correctness","result":0,"tags":["bbh"],"time":137,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.168.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.169.correctness","result":0,"tags":["bbh"],"time":166,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.170.correctness","result":1,"tags":["bbh"],"time":102,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.171.correctness","result":1,"tags":["bbh"],"time":101,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.172.correctness","result":0,"tags":["bbh"],"time":183,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.173.correctness","result":0,"tags":["bbh"],"time":98,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.174.correctness","result":0,"tags":["bbh"],"time":156,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.175.correctness","result":0,"tags":["bbh"],"time":123,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.176.correctness","result":0,"tags":["bbh"],"time":256,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.177.correctness","result":1,"tags":["bbh"],"time":139,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.178.correctness","result":1,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.179.correctness","result":0,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.180.correctness","result":0,"tags":["bbh"],"time":128,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.181.correctness","result":1,"tags":["bbh"],"time":225,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.182.correctness","result":0,"tags":["bbh"],"time":173,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.183.correctness","result":0,"tags":["bbh"],"time":699,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.184.correctness","result":0,"tags":["bbh"],"time":139,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.185.correctness","result":0,"tags":["bbh"],"time":134,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.186.correctness","result":0,"tags":["bbh"],"time":218,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.187.correctness","result":1,"tags":["bbh"],"time":105,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.188.correctness","result":0,"tags":["bbh"],"time":431,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.189.correctness","result":0,"tags":["bbh"],"time":114,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.190.correctness","result":0,"tags":["bbh"],"time":106,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.191.correctness","result":1,"tags":["bbh"],"time":107,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.192.correctness","result":0,"tags":["bbh"],"time":129,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.193.correctness","result":0,"tags":["bbh"],"time":121,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.194.correctness","result":0,"tags":["bbh"],"time":64,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.195.correctness","result":0,"tags":["bbh"],"time":184,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.196.correctness","result":1,"tags":["bbh"],"time":187,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.197.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.198.correctness","result":0,"tags":["bbh"],"time":205,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.199.correctness","result":0,"tags":["bbh"],"time":248,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.200.correctness","result":1,"tags":["bbh"],"time":183,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.201.correctness","result":0,"tags":["bbh"],"time":133,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.202.correctness","result":1,"tags":["bbh"],"time":117,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.203.correctness","result":0,"tags":["bbh"],"time":138,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.204.correctness","result":0,"tags":["bbh"],"time":113,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.205.correctness","result":1,"tags":["bbh"],"time":130,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.206.correctness","result":1,"tags":["bbh"],"time":118,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":"default","judge.temperature":0,"judge.seed":42,"name":"2024-09-25T19:12:22.560Z-l3.2-bbh_256"},{"id":"task.207.correctness","result":0,"tags":["bbh"],"time":107,"llm.model":"llama3.2:3b-instruct-q4_0","llm.apiUrl":"http://harbor.ollama:11434","llm.max_tokens":1024,"llm.seed":42,"llm.temperature":0,"judge.model":"meta-llama/llama-3.1-70b-instruct","judge.apiUrl":"https://openrouter.ai/api","judge.prompt":" |
View raw
(Sorry about that, but we can’t show files that are this big right now.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment