Skip to content

Instantly share code, notes, and snippets.

@smartexpert
Created May 5, 2026 20:18
Show Gist options
  • Select an option

  • Save smartexpert/e18e91530034b306d4a81bd7fa58e7d0 to your computer and use it in GitHub Desktop.

Select an option

Save smartexpert/e18e91530034b306d4a81bd7fa58e7d0 to your computer and use it in GitHub Desktop.
OpenRouter × GPU Break-even Calculator
<!doctype html>
<html lang="en"><head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>OpenRouter × GPU Breakeven</title>
<script src="https://cdn.tailwindcss.com"></script>
<style>
:root { color-scheme: dark; }
html, body { background: #08090a; min-height: 100%; }
body { font-family: 'Inter', ui-sans-serif, system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif; -webkit-font-smoothing: antialiased; }
.mono { font-family: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, Monaco, monospace; }
.glow { box-shadow: 0 0 0 1px rgba(255,255,255,.04), 0 1px 0 rgba(255,255,255,.02) inset; }
.grad-border { position: relative; }
.grad-border::before { content:""; position:absolute; inset:0; padding:1px; border-radius:inherit;
background: linear-gradient(135deg, rgba(118,131,255,.45), rgba(118,131,255,0) 40%, rgba(255,255,255,.04));
-webkit-mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0); -webkit-mask-composite: xor; mask-composite: exclude; pointer-events:none;}
.verdict-buy { background: linear-gradient(135deg, rgba(34,197,94,.18), rgba(34,197,94,.04)); border-color: rgba(34,197,94,.4); }
.verdict-maybe { background: linear-gradient(135deg, rgba(234,179,8,.16), rgba(234,179,8,.03)); border-color: rgba(234,179,8,.4); }
.verdict-info { background: linear-gradient(135deg, rgba(120,120,140,.14), rgba(120,120,140,.03)); border-color: rgba(255,255,255,.10); }
.verdict-skip { background: linear-gradient(135deg, rgba(244,63,94,.16), rgba(244,63,94,.03)); border-color: rgba(244,63,94,.4); }
.pill { display:inline-flex; align-items:center; gap:.25rem; padding:.125rem .55rem; border-radius:9999px; font-size:.7rem; font-weight:500; }
input[type=range] { accent-color: #7683ff; }
details > summary { list-style: none; cursor: pointer; }
details > summary::-webkit-details-marker { display: none; }
th { font-weight: 500; color: #a1a1aa; font-size: .68rem; text-transform: uppercase; letter-spacing: .04em; }
.num { font-variant-numeric: tabular-nums; }
.row:hover { background: rgba(255,255,255,.025); }
.ghost-input { background: rgba(0,0,0,.35); border: 1px solid rgba(255,255,255,.06); }
.ghost-input:focus-within { border-color: rgba(118,131,255,.45); }
.preset-btn { transition: all .15s; }
.preset-btn[data-active="true"] { background: rgba(118,131,255,.18); color: #c7cfff; border-color: rgba(118,131,255,.5); }
.tab-btn[data-active="true"] { background: rgba(255,255,255,.06); color: #fff; }
.combobox-pop { box-shadow: 0 12px 40px rgba(0,0,0,.55); }
</style>
</head>
<body class="text-zinc-200">
<header class="border-b border-white/5">
<div class="max-w-[1180px] mx-auto px-6 py-5 flex items-center justify-between">
<div class="flex items-center gap-3">
<div class="h-8 w-8 rounded-lg grad-border glow flex items-center justify-center mono text-xs">≷</div>
<div>
<div class="text-sm font-medium tracking-tight">OpenRouter × GPU Breakeven</div>
<div class="text-[11px] text-zinc-500">2026-05-05 20:13 UTC · 374 models · 214 open-weight · anchor RTX 5070 Ti @ 110 t/s on Qwen3.5-9B Q4_K_M</div>
</div>
</div>
<div class="flex items-center gap-1 bg-zinc-900/60 grad-border glow rounded-lg p-1 text-xs">
<button data-tab="model" data-active="true" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Pick a model</button>
<button data-tab="gpu" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Evaluate a GPU</button>
<button data-tab="usage" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">My usage</button>
<button data-tab="browse" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Browse all</button>
</div>
</div>
</header>
<main class="max-w-[1180px] mx-auto px-6 py-8 space-y-6">
<!-- ============================================================ MODEL TAB -->
<section data-pane="model" class="space-y-6">
<div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-6">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">I want to run</div>
<div class="combobox" data-target="model-combo">
<div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3 cursor-text">
<span class="text-zinc-500 text-xs mono">model</span>
<input id="model-search" type="text" placeholder="qwen 3.5 9b…" autocomplete="off"
class="bg-transparent flex-1 text-lg font-medium outline-none mono">
<span id="model-meta" class="text-xs text-zinc-500"></span>
</div>
<div id="model-results" class="combobox-pop hidden mt-2 rounded-xl bg-zinc-950/95 grad-border max-h-72 overflow-y-auto"></div>
</div>
<div class="space-y-3">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Daily usage</div>
<div class="text-sm mono text-zinc-300"><span id="tpd-val"></span> output tok/day</div>
</div>
<input id="tpd" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full">
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-tpd="4" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🐢 casual · 10K</button>
<button data-tpd="5" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💬 power · 100K</button>
<button data-tpd="6" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">⚡ heavy dev · 1M</button>
<button data-tpd="7" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🚀 small team · 10M</button>
<button data-tpd="8" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 prod · 100M</button>
<button data-tpd="9" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🌐 scale · 1B</button>
</div>
</div>
<div class="space-y-3 pt-2">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Context to load</div>
<div class="text-sm mono text-zinc-300"><span id="ctx-val"></span> tokens</div>
</div>
<input id="ctx" type="range" min="10" max="20.5" step="0.05" value="13" class="w-full">
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-ctx="12" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">4K</button>
<button data-ctx="13" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">8K</button>
<button data-ctx="15" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">32K</button>
<button data-ctx="17" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">128K</button>
<button data-ctx="18" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">256K</button>
<button data-ctx="max" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">model max</button>
</div>
<div id="ctx-warn" class="text-[11px] text-amber-300/80 hidden">Exceeds this model's max context</div>
</div>
<div class="space-y-3 pt-2">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">My budget</div>
<div class="text-[11px] text-zinc-500"><span id="budget-fits-count">—</span> GPUs in range</div>
</div>
<div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3">
<span class="text-zinc-500 text-sm">$</span>
<input id="budget" type="number" min="100" step="100" value="1000"
class="bg-transparent flex-1 text-2xl font-semibold mono outline-none">
<span class="text-xs text-zinc-500">USD</span>
</div>
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-budget="500" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🪙 ≤$500</button>
<button data-budget="1000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">💵 ≤$1K</button>
<button data-budget="2000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💼 ≤$2K</button>
<button data-budget="3000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🎯 ≤$3K</button>
<button data-budget="10000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">⚙️ workstation ≤$10K</button>
<button data-budget="40000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 datacenter ≤$40K</button>
</div>
<label class="flex items-center gap-2 text-[11px] text-zinc-400 pt-1 cursor-pointer select-none">
<input id="include-dc" type="checkbox" class="accent-indigo-400">
<span>Include data-center GPUs (H100, A100, B200…)</span>
<span class="text-zinc-600">— off by default</span>
</label>
<div class="pt-2 border-t border-white/5 flex items-center justify-between gap-3 text-[11px]">
<div id="settings-strip" class="mono text-zinc-400 truncate">…</div>
<button id="open-tune" class="text-zinc-400 hover:text-indigo-300 underline decoration-dotted underline-offset-4 shrink-0">tune ↓</button>
</div>
</div>
</div>
<!-- Verdict strip (single row) -->
<div id="verdict-model" class="rounded-xl border-2 grad-border glow px-5 py-4"></div>
<details id="math-panel" class="rounded-xl bg-zinc-900/30 grad-border glow px-5 py-3">
<summary class="text-[11px] text-zinc-400 hover:text-zinc-200 select-none flex items-center justify-between">
<span>▸ Show the math · what's actually being computed</span>
<span class="text-zinc-600">click to expand</span>
</summary>
<pre id="math-body" class="mono text-[11px] text-zinc-300 leading-relaxed mt-3 whitespace-pre-wrap"></pre>
</details>
<!-- Ranked GPU answer table -->
<div class="rounded-xl bg-zinc-900/30 grad-border glow overflow-hidden">
<div class="px-5 py-3 border-b border-white/5 flex items-center justify-between">
<div class="text-sm font-medium">All GPUs ranked by price</div>
<div class="text-[11px] text-zinc-500" id="gpu-count">—</div>
</div>
<div class="overflow-x-auto"><table class="w-full text-sm">
<thead><tr>
<th class="text-left px-4 py-2 text-zinc-500">GPU</th>
<th data-sort="price" class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Price</th>
<th class="text-right px-4 py-2">VRAM used</th>
<th data-sort="tps" title="Estimated single-stream output tokens/sec. Anchor: RTX 5070 Ti @ 110 t/s on Qwen3.5-9B Q4_K_M. Scaled by bandwidth ÷ active-param-bytes × engine multiplier. ±25% in practice." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">TPS ⓘ</th>
<th data-sort="payback" title="Days until net API savings (after electricity) recoup the GPU sticker price. Excludes amortization." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Payback ⓘ</th>
<th data-sort="netyr" title="Year-1 P&L while owning. (API spend avoided − electricity − GPU amortized over N years) × 365." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Year-1 net ⓘ</th>
<th class="text-right px-4 py-2">Verdict</th>
</tr></thead>
<tbody id="gpu-table-body" class="divide-y divide-white/5"></tbody>
</table></div>
<div class="px-5 py-2 border-t border-white/5 flex items-center justify-between text-[11px] text-zinc-500">
<div id="hint-line"></div>
<label id="show-fails-label" class="flex items-center gap-2 cursor-pointer select-none"><input id="show-fails" type="checkbox" class="accent-indigo-400"><span id="show-fails-text">show GPUs that don't fit</span></label>
</div>
</div>
<!-- API alternative reference -->
<div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden">
<div class="px-5 py-3 border-b border-white/5 flex items-center justify-between">
<div class="text-sm font-medium">If you stayed on the API</div>
<div class="text-[11px] text-zinc-500">cheapest provider for this model · plus closed-frontier reference</div>
</div>
<div id="api-table"></div>
</div>
</section>
<!-- ============================================================ GPU TAB -->
<section data-pane="gpu" class="space-y-6 hidden">
<div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-5">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">I'm considering</div>
<select id="gpu-select" class="ghost-input rounded-xl px-4 py-3 text-lg font-medium outline-none w-full mono"></select>
<div class="space-y-2">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">at the price I can pay</div>
<div class="text-[11px] text-zinc-500" id="gpu-price-context"></div>
</div>
<div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3">
<span class="text-zinc-500 text-sm">$</span>
<input id="gpu-price-headline" type="number" min="0" step="50"
class="bg-transparent flex-1 text-2xl font-semibold mono outline-none">
<span class="text-xs text-zinc-500">USD</span>
</div>
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-priceset="msrp" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">use MSRP</button>
<button data-priceset="street" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">use street</button>
<button data-priceset="reset" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-rose-300/70">reset override</button>
</div>
</div>
<div id="gpu-summary" class="grid grid-cols-2 md:grid-cols-5 gap-3 text-xs"></div>
<div class="space-y-3 pt-2">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Daily usage</div>
<div class="text-sm mono text-zinc-300"><span id="tpd-val2"></span> output tok/day</div>
</div>
<input id="tpd2" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full">
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-tpd="4" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🐢 casual · 10K</button>
<button data-tpd="5" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💬 power · 100K</button>
<button data-tpd="6" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">⚡ heavy · 1M</button>
<button data-tpd="7" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🚀 team · 10M</button>
<button data-tpd="8" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 prod · 100M</button>
<button data-tpd="9" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🌐 scale · 1B</button>
</div>
</div>
<div class="space-y-3 pt-2">
<div class="flex items-center justify-between">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Context per request</div>
<div class="text-sm mono text-zinc-300"><span id="ctx-val2"></span> tokens</div>
</div>
<input id="ctx2" type="range" min="10" max="20.5" step="0.05" value="13" class="w-full">
<div class="flex flex-wrap gap-1.5 text-[11px] mono">
<button data-ctx="12" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">4K</button>
<button data-ctx="13" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">8K</button>
<button data-ctx="15" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">32K</button>
<button data-ctx="17" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">128K</button>
<button data-ctx="18" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">256K</button>
</div>
</div>
</div>
<div id="verdict-gpu" class="rounded-2xl border-2 grad-border glow p-6 space-y-3"></div>
<div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden">
<div class="px-5 py-3 border-b border-white/5 flex items-center justify-between gap-3 flex-wrap">
<div class="text-sm font-medium shrink-0">Models that fit</div>
<div class="flex items-center gap-2">
<input id="gpu-model-search" type="text" placeholder="filter…" class="ghost-input rounded-md px-2 py-1 mono text-[11px] w-32">
<select id="gpu-model-sort" class="ghost-input rounded-md px-2 py-1 mono text-[11px]">
<option value="created">Newest</option>
<option value="tps">Fastest TPS</option>
<option value="dailySave">Highest savings</option>
<option value="payback">Best payback</option>
<option value="params">Largest params</option>
</select>
<label class="flex items-center gap-1.5 text-[11px] text-zinc-400"><input id="gpu-mainstream-only" type="checkbox" checked class="accent-indigo-400">mainstream only</label>
</div>
</div>
<div id="gpu-models" class="overflow-x-auto"></div>
<div class="px-5 py-2 border-t border-white/5 text-[11px] text-zinc-500" id="gpu-models-footer"></div>
</div>
<div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden">
<div class="px-5 py-3 border-b border-white/5 text-sm font-medium">Bigger GPUs to consider</div>
<div id="bigger-gpus" class="divide-y divide-white/5"></div>
</div>
</section>
<!-- ============================================================ USAGE TAB -->
<section data-pane="usage" class="space-y-6 hidden">
<div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-5">
<div class="flex items-start justify-between gap-4 flex-wrap">
<div>
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Connect your OpenRouter usage</div>
<div class="text-lg font-medium tracking-tight mt-1">Use your real spend instead of guessing</div>
<div class="text-xs text-zinc-500 mt-1">All three options run entirely in your browser. Your key never leaves this page.</div>
</div>
<div id="usage-status" class="text-xs text-zinc-400"></div>
</div>
<!-- Sub-tabs for import method -->
<div class="flex items-center gap-1 bg-zinc-950/60 rounded-lg p-1 text-xs">
<button data-utab="paste" data-active="true" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">① Paste / drop file</button>
<button data-utab="connect" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">② Connect with API key</button>
<button data-utab="bookmarklet" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">③ Bookmarklet</button>
</div>
<!-- ① Paste / drop -->
<div data-upane="paste" class="space-y-3">
<div class="text-xs text-zinc-400">Run this once in your terminal, then paste the result below or drop the file:</div>
<div class="flex items-stretch gap-2">
<code id="curl-snippet" class="flex-1 ghost-input rounded-md px-3 py-2 mono text-[11px] overflow-x-auto whitespace-nowrap">curl -s "https://openrouter.ai/api/v1/analytics" -H "Authorization: Bearer $OPENROUTER_API_KEY" &gt; usage.json</code>
<button id="copy-curl" class="ghost-input rounded-md px-3 text-xs hover:bg-white/5">copy</button>
</div>
<textarea id="usage-paste" placeholder='paste the full JSON response here (or drop the file anywhere on this card)' class="ghost-input rounded-md px-3 py-2 mono text-[11px] w-full h-40"></textarea>
<div class="flex gap-2">
<button id="usage-load" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Load</button>
<input type="file" id="usage-file" accept=".json,application/json" class="hidden">
<button id="usage-pickfile" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-white/5">Pick file…</button>
</div>
</div>
<!-- ② Connect -->
<div data-upane="connect" class="space-y-3 hidden">
<div class="text-xs text-zinc-400">Paste your API key — it's stored only in your browser's localStorage and used to call <span class="mono">openrouter.ai</span> directly.</div>
<div class="flex gap-2">
<input id="api-key" type="password" placeholder="sk-or-v1-…" autocomplete="off" class="ghost-input rounded-md px-3 py-2 mono text-xs flex-1">
<button id="api-fetch" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Fetch usage</button>
<button id="api-clear" class="ghost-input rounded-md px-3 py-1.5 text-xs hover:bg-rose-500/15 text-rose-300">Clear</button>
</div>
<div id="api-status" class="text-[11px] text-zinc-500"></div>
<div class="text-[10px] text-zinc-600">Hits <span class="mono">/api/v1/credits</span>, <span class="mono">/api/v1/auth/key</span>, <span class="mono">/api/v1/analytics</span>. Verify in DevTools — no other origins are contacted.</div>
</div>
<!-- ③ Bookmarklet -->
<div data-upane="bookmarklet" class="space-y-3 hidden">
<div class="text-xs text-zinc-400">Drag this link to your bookmarks bar. Then visit <span class="mono">openrouter.ai</span> while logged in and click the bookmark — it copies your usage to the clipboard. Paste it back here.</div>
<div class="flex items-center gap-3">
<a id="bookmarklet" class="ghost-input rounded-md px-4 py-2 text-xs hover:bg-indigo-500/15 cursor-grab" draggable="true">📊 Grab my OpenRouter usage</a>
<span class="text-[10px] text-zinc-500">drag to bookmarks bar →</span>
</div>
<textarea id="bm-paste" placeholder="…then paste the clipboard contents here" class="ghost-input rounded-md px-3 py-2 mono text-[11px] w-full h-32"></textarea>
<button id="bm-load" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Load</button>
</div>
</div>
<!-- Aggregate KPIs -->
<div id="usage-kpis" class="grid grid-cols-1 md:grid-cols-3 gap-4 hidden"></div>
<!-- Open vs closed split -->
<div id="usage-split" class="grid grid-cols-1 md:grid-cols-2 gap-4 hidden"></div>
<!-- Per-model rows -->
<div id="usage-rows" class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden hidden">
<div class="px-5 py-3 border-b border-white/5 flex items-center justify-between">
<div class="text-sm font-medium">Per-model · what it cost vs what it would cost on local hardware</div>
<div id="usage-gpu-pick" class="text-[11px] text-zinc-500"></div>
</div>
<div id="usage-rows-body" class="divide-y divide-white/5"></div>
</div>
<div id="usage-empty" class="rounded-2xl bg-zinc-900/30 grad-border glow p-8 text-center text-sm text-zinc-500">
No usage data loaded yet · pick an option above
</div>
</section>
<!-- ============================================================ BROWSE TAB -->
<section data-pane="browse" class="grid grid-cols-12 gap-6 hidden">
<aside class="col-span-3 space-y-4">
<section class="rounded-xl bg-zinc-900/40 grad-border glow p-4 space-y-3 text-xs">
<div class="text-[11px] uppercase tracking-wider text-zinc-500">Browse · cost mode</div>
<div class="grid grid-cols-3 gap-1 mono">
<button data-mode="payback" data-active="true" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">Payback</button>
<button data-mode="amortized" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">Amort.</button>
<button data-mode="tco" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">TCO</button>
</div>
<label class="block">
<div class="flex justify-between"><span>Tokens / day</span><span id="tpd-val3" class="mono text-zinc-300"></span></div>
<input id="tpd3" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full">
</label>
<label class="block">
<div class="flex justify-between"><span>Max VRAM</span><span id="vram-val" class="mono text-zinc-300"></span></div>
<input id="vram" type="range" min="8" max="200" step="1" value="200" class="w-full">
</label>
<label class="flex items-center gap-2"><input id="open-only" type="checkbox" checked class="accent-indigo-400">Open-weight only</label>
<label class="block"><input id="search" type="text" placeholder="search…" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></label>
</section>
</aside>
<section class="col-span-9 space-y-4">
<div id="kpis" class="grid grid-cols-3 gap-4"></div>
<div class="rounded-xl bg-zinc-900/30 grad-border glow overflow-hidden">
<div class="px-4 py-3 border-b border-white/5 text-sm font-medium">Models · click row for providers</div>
<div id="model-table" class="divide-y divide-white/5"></div>
</div>
</section>
</section>
<!-- ============================================================ TUNE DRAWER (shared) -->
<details class="max-w-[1180px] mx-auto px-6 mt-2 mb-10">
<summary class="rounded-xl bg-zinc-900/30 grad-border glow px-5 py-3 text-sm flex items-center justify-between">
<span class="flex items-center gap-2"><span class="text-zinc-500">▸</span> Tune assumptions</span>
<span class="text-[11px] text-zinc-500">engine · quant · electricity · amortization · I:O ratio</span>
</summary>
<div class="rounded-xl bg-zinc-950/40 grad-border glow mt-2 p-5 grid grid-cols-1 md:grid-cols-3 gap-5 text-xs">
<div class="space-y-3">
<div class="text-[11px] uppercase tracking-wider text-zinc-500">Inference engine</div>
<select id="engine" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></select>
<div id="engine-help" class="text-[11px] text-zinc-500 leading-snug"></div>
<div class="grid grid-cols-2 gap-1 mono">
<button data-conc="single" data-active="true" class="conc-btn rounded-md px-2 py-1 text-zinc-400">Single-stream</button>
<button data-conc="batched" class="conc-btn rounded-md px-2 py-1 text-zinc-400">Batched serving</button>
</div>
<label class="block">
<div class="flex justify-between"><span>Speculative decode</span><span id="spec-val" class="mono text-zinc-300"></span></div>
<input id="spec" type="range" min="1.0" max="2.5" step="0.05" value="1.0" class="w-full">
</label>
</div>
<div class="space-y-3">
<div class="text-[11px] uppercase tracking-wider text-zinc-500">Local stack</div>
<select id="quant" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></select>
<div id="quant-help" class="text-[11px] text-zinc-500 leading-snug"></div>
<label class="block">
<div class="flex justify-between"><span>KV cache quant</span></div>
<div class="grid grid-cols-3 gap-1 mono mt-1">
<button data-kv="FP16" class="kv-btn rounded-md px-2 py-1 text-zinc-400">FP16</button>
<button data-kv="Q8" data-active="true" class="kv-btn rounded-md px-2 py-1 text-zinc-400">Q8</button>
<button data-kv="Q4" class="kv-btn rounded-md px-2 py-1 text-zinc-400">Q4</button>
</div>
</label>
<label class="block">
<div class="flex justify-between"><span>Min TPS target</span><span id="mintps-val" class="mono text-zinc-300"></span></div>
<input id="mintps" type="range" min="0" max="200" step="5" value="30" class="w-full">
</label>
<label class="block">
<div class="flex justify-between"><span>Input:Output ratio</span><span id="io-val" class="mono text-zinc-300"></span></div>
<input id="io" type="range" min="0" max="10" step="0.5" value="3" class="w-full">
<div class="text-[10px] text-zinc-600 mt-0.5">e.g. 3:1 = 3 input tokens for every 1 output token. Typical chat ≈ 3:1, agent loops ≈ 10:1+.</div>
</label>
<div class="grid grid-cols-2 gap-1 mono">
<button data-price="street" data-active="true" class="price-btn rounded-md px-2 py-1 text-zinc-400">Street</button>
<button data-price="msrp" class="price-btn rounded-md px-2 py-1 text-zinc-400">MSRP</button>
</div>
</div>
<div class="space-y-3">
<div class="text-[11px] uppercase tracking-wider text-zinc-500">Cost model</div>
<label class="block">
<div class="flex justify-between"><span>Amortize over</span><span id="years-val" class="mono text-zinc-300"></span></div>
<input id="years" type="range" min="1" max="7" step="0.5" value="3" class="w-full">
</label>
<label class="block">
<div class="flex justify-between"><span>GPU utilization</span><span id="util-val" class="mono text-zinc-300"></span></div>
<input id="util" type="range" min="0.05" max="1.0" step="0.05" value="0.4" class="w-full">
</label>
<label class="block">
<div class="flex justify-between"><span>Electricity $/kWh</span><span id="kwh-val" class="mono text-zinc-300"></span></div>
<input id="kwh" type="range" min="0.0" max="0.40" step="0.005" value="0.082" class="w-full">
<div class="text-[10px] text-zinc-600 mt-0.5">UAE ≈ 0.08 · US 0.16 · EU 0.30</div>
</label>
</div>
</div>
</details>
<footer class="max-w-[1180px] mx-auto px-6 pb-12 text-[11px] text-zinc-600 leading-relaxed">
Methodology: API pricing from <span class="mono">openrouter.ai/api/v1/models/{id}/endpoints</span>. Local TPS = anchor (110 t/s on Qwen3-9B Q4 @ RTX 5070 Ti) × bandwidth_ratio ÷ model_size_ratio × engine_multiplier × speculative. Memory-bound single-stream model, ±25% real-world. VRAM = params × bytes × 1.2 overhead. Verdict thresholds: BUY if payback &lt; 1y, MAYBE 1-3y, SKIP &gt; 3y. <strong>Point-in-time POC.</strong> Verify hardware prices before purchase.
</footer>
<script>
const DATA = [{"id": "ibm-granite/granite-4.1-8b", "name": "IBM: Granite 4.1 8B", "hf": "ibm-granite/granite-4.1-8b", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "WandB", "tag": "wandb/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "WandB", "tag": "wandb/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free", "name": "NVIDIA: Nemotron 3 Nano Omni (free)", "hf": null, "context": 256000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.81825212683681, "uptime_1d": 97.62081971969573}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.81825212683681, "uptime_1d": 97.62081971969573}}, {"id": "qwen/qwen3.6-35b-a3b", "name": "Qwen: Qwen3.6 35B A3B", "hf": "Qwen/Qwen3.6-35B-A3B", "context": 262144, "open_weight": true, "params_total_b": 35.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.14, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.16119999999999998, "completion_per_mtok": 0.9652499999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94186046511628, "uptime_1d": 99.94077207826547}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7656680490705}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92692224131461}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.16119999999999998, "completion_per_mtok": 0.9652499999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94186046511628, "uptime_1d": 99.94077207826547}}, {"id": "qwen/qwen3.6-27b", "name": "Qwen: Qwen3.6 27B", "hf": "Qwen/Qwen3.6-27B", "context": 262144, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.5, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48717948717949, "uptime_1d": 98.82017631073109}, {"provider": "Morph", "tag": "morph", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.55, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 44.611973392461195, "uptime_1d": 95.61675882603147}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 81920, "prompt_per_mtok": 0.32, "completion_per_mtok": 3.1999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95503597122301, "uptime_1d": 98.80547562995902}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.325, "completion_per_mtok": 3.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.79561793906196}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.54010631308606}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.5, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48717948717949, "uptime_1d": 98.82017631073109}}, {"id": "google/gemma-4-26b-a4b-it:free", "name": "Google: Gemma 4 26B A4B (free)", "hf": "google/gemma-4-26B-A4B-it", "context": 262144, "open_weight": true, "params_total_b": 26.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.10400000000000001, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.87583083777665}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.87583083777665}}, {"id": "google/gemma-4-26b-a4b-it", "name": "Google: Gemma 4 26B A4B ", "hf": "google/gemma-4-26B-A4B-it", "context": 262144, "open_weight": true, "params_total_b": 26.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.10400000000000001, "providers": [{"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.33, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.92884371029224, "uptime_1d": 67.14721482834327}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 256000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94018244354717, "uptime_1d": 99.87809761613116}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7280629340067, "uptime_1d": 99.7397940910503}, {"provider": "Ionstream", "tag": "ionstream/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.35, "throughput_tps": null, "latency_ms": null, "uptime_30m": 63.57952325127003, "uptime_1d": 49.62655135620823}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.82595431466203}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48339483394834, "uptime_1d": 99.81930536943634}, {"provider": "NextBit", "tag": "nextbit/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.12445364619278, "uptime_1d": 99.00234285917048}, {"provider": "Io Net", "tag": "io-net/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66058549002969, "uptime_1d": 97.35400038703527}, {"provider": "Venice", "tag": "venice/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.1625, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.35728447055057}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.76572142801344, "uptime_1d": 99.76022617471281}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.33, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.92884371029224, "uptime_1d": 67.14721482834327}}, {"id": "google/gemma-4-31b-it:free", "name": "Google: Gemma 4 31B (free)", "hf": "google/gemma-4-31B-it", "context": 262144, "open_weight": true, "params_total_b": 31.0, "params_active_b": 31.0, "kv_gb_per_1k": 0.124, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.91177767975297}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.91177767975297}}, {"id": "google/gemma-4-31b-it", "name": "Google: Gemma 4 31B", "hf": "google/gemma-4-31B-it", "context": 262144, "open_weight": true, "params_total_b": 31.0, "params_active_b": 31.0, "kv_gb_per_1k": 0.124, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7056614497696, "uptime_1d": 99.53112885595147}, {"provider": "Chutes", "tag": "chutes/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.65674782085964, "uptime_1d": 96.84352083731063}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.55222665472604, "uptime_1d": 98.23305193498936}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.8361738024794, "uptime_1d": 99.55852663839724}, {"provider": "Venice", "tag": "venice/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.175, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96374184191443, "uptime_1d": 98.14838462520751}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.60205992509363, "uptime_1d": 99.05116165942282}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7056614497696, "uptime_1d": 99.53112885595147}}, {"id": "nvidia/nemotron-3-super-120b-a12b:free", "name": "NVIDIA: Nemotron 3 Super (free)", "hf": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "context": 262144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 12.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.13816189896531, "uptime_1d": 98.56685729155443}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.13816189896531, "uptime_1d": 98.56685729155443}}, {"id": "nvidia/nemotron-3-super-120b-a12b", "name": "NVIDIA: Nemotron 3 Super", "hf": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "context": 262144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 12.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.16168271667512, "uptime_1d": 78.29774218663108}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.21918572225321, "uptime_1d": 99.77634402608622}, {"provider": "Nebius", "tag": "nebius/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": null, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.10126582278481, "uptime_1d": 98.45385347288297}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.16168271667512, "uptime_1d": 78.29774218663108}}, {"id": "qwen/qwen3.5-9b", "name": "Qwen: Qwen3.5-9B", "hf": "Qwen/Qwen3.5-9B", "context": 262144, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.036000000000000004, "providers": [{"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.4863387978142, "uptime_1d": 96.8969518580364}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.85093557704383}], "cheapest": {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.4863387978142, "uptime_1d": 96.8969518580364}}, {"id": "qwen/qwen3.5-35b-a3b", "name": "Qwen: Qwen3.5-35B-A3B", "hf": "Qwen/Qwen3.5-35B-A3B", "context": 262144, "open_weight": true, "params_total_b": 35.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.14, "providers": [{"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97316876844647, "uptime_1d": 99.97374382207578}, {"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.19, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 76.9607843137255}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.1625, "completion_per_mtok": 1.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 89.35596765758879}, {"provider": "Venice", "tag": "venice", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3125, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.18963845407951}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.22499999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70370370370371, "uptime_1d": 99.62541116977069}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96597125880167}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 90.98641993732278}], "cheapest": {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97316876844647, "uptime_1d": 99.97374382207578}}, {"id": "qwen/qwen3.5-27b", "name": "Qwen: Qwen3.5-27B", "hf": "Qwen/Qwen3.5-27B", "context": 262144, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.195, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.55936413313464, "uptime_1d": 94.58952366365446}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.27, "completion_per_mtok": 2.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.51768488745981, "uptime_1d": 98.97541385052405}, {"provider": "Phala", "tag": "phala", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 85.5402042392517}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.8542539624704, "uptime_1d": 99.75450996412069}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.195, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.55936413313464, "uptime_1d": 94.58952366365446}}, {"id": "qwen/qwen3.5-122b-a10b", "name": "Qwen: Qwen3.5-122B-A10B", "hf": "Qwen/Qwen3.5-122B-A10B", "context": 262144, "open_weight": true, "params_total_b": 122.0, "params_active_b": 10.0, "kv_gb_per_1k": 0.488, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86768111147867, "uptime_1d": 99.91762490753815}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.80376124284545}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 3.1999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92791139655284}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.5, "completion_per_mtok": 4.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.84482758620689, "uptime_1d": 96.0263616980035}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86768111147867, "uptime_1d": 99.91762490753815}}, {"id": "liquid/lfm-2-24b-a2b", "name": "LiquidAI: LFM2-24B-A2B", "hf": "LiquidAI/LFM2-24B-A2B", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 2.0, "kv_gb_per_1k": 0.384, "providers": [{"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3.5-397b-a17b", "name": "Qwen: Qwen3.5 397B A17B", "hf": "Qwen/Qwen3.5-397B-A17B", "context": 262144, "open_weight": true, "params_total_b": 397.0, "params_active_b": 17.0, "kv_gb_per_1k": 1.588, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.70418006430869, "uptime_1d": 81.89601276855522}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95128531814434, "uptime_1d": 99.35039110484392}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.55, "completion_per_mtok": 3.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.6135169953424, "uptime_1d": 99.15288328223555}, {"provider": "Morph", "tag": "morph", "context": 262144, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.55, "completion_per_mtok": 3.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.76261803972646, "uptime_1d": 87.50405550425516}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.5, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96552912788694, "uptime_1d": 99.95901333929503}, {"provider": "Novita", "tag": "novita", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.19583727530747, "uptime_1d": 99.38172203335918}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 85.56473829201103, "uptime_1d": 89.45778239823268}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.75, "completion_per_mtok": 4.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.42857142857143, "uptime_1d": 93.84131493506493}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.70418006430869, "uptime_1d": 81.89601276855522}}, {"id": "liquid/lfm-2.5-1.2b-thinking:free", "name": "LiquidAI: LFM2.5-1.2B-Thinking (free)", "hf": "LiquidAI/LFM2.5-1.2B-Thinking", "context": 32768, "open_weight": true, "params_total_b": 1.2, "params_active_b": 1.2, "kv_gb_per_1k": 0.0192, "providers": [{"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 92.04306800366788}], "cheapest": {"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 92.04306800366788}}, {"id": "liquid/lfm-2.5-1.2b-instruct:free", "name": "LiquidAI: LFM2.5-1.2B-Instruct (free)", "hf": "LiquidAI/LFM2.5-1.2B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 1.2, "params_active_b": 1.2, "kv_gb_per_1k": 0.0192, "providers": [{"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 93.35464044253227}], "cheapest": {"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 93.35464044253227}}, {"id": "allenai/olmo-3.1-32b-instruct", "name": "AllenAI: Olmo 3.1 32B Instruct", "hf": "allenai/Olmo-3.1-32B-Instruct", "context": 65536, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 65536, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 65536, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-3-nano-30b-a3b:free", "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)", "hf": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "context": 256000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "Nvidia", "tag": "nvidia/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98661597618367}], "cheapest": {"provider": "Nvidia", "tag": "nvidia/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98661597618367}}, {"id": "nvidia/nemotron-3-nano-30b-a3b", "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "hf": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "context": 262144, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": 228000, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": 228000, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/ministral-14b-2512", "name": "Mistral: Ministral 3 14B 2512", "hf": "mistralai/Ministral-3-14B-Instruct-2512", "context": 262144, "open_weight": true, "params_total_b": 14.0, "params_active_b": 14.0, "kv_gb_per_1k": 0.056, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.8841642228739, "uptime_1d": 99.11579237942428}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.35, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.1578947368421, "uptime_1d": 97.52548071871388}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.8841642228739, "uptime_1d": 99.11579237942428}}, {"id": "mistralai/ministral-8b-2512", "name": "Mistral: Ministral 3 8B 2512", "hf": "mistralai/Ministral-3-8B-Instruct-2512", "context": 262144, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.03027285401315, "uptime_1d": 98.71209967718057}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.10968775020017, "uptime_1d": 95.64102267869727}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.03027285401315, "uptime_1d": 98.71209967718057}}, {"id": "mistralai/ministral-3b-2512", "name": "Mistral: Ministral 3 3B 2512", "hf": "mistralai/Ministral-3-3B-Instruct-2512", "context": 131072, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.73936671255706, "uptime_1d": 99.81908279573487}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66287399915718, "uptime_1d": 99.6376727261442}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.73936671255706, "uptime_1d": 99.81908279573487}}, {"id": "allenai/olmo-3-32b-think", "name": "AllenAI: Olmo 3 32B Think", "hf": "allenai/Olmo-3-32B-Think", "context": 65536, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [], "cheapest": null}, {"id": "deepcogito/cogito-v2.1-671b", "name": "Deep Cogito: Cogito v2.1 671B", "hf": "", "context": 128000, "open_weight": false, "params_total_b": 671.0, "params_active_b": 671.0, "kv_gb_per_1k": 10.736, "providers": [{"provider": "Together", "tag": "together", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 1.25, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Together", "tag": "together", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 1.25, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/voxtral-small-24b-2507", "name": "Mistral: Voxtral Small 24B 2507", "hf": "mistralai/Voxtral-Small-24B-2507", "context": 32000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 32000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.49579831932773, "uptime_1d": 99.83273090825195}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 32000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.49579831932773, "uptime_1d": 99.83273090825195}}, {"id": "openai/gpt-oss-safeguard-20b", "name": "OpenAI: gpt-oss-safeguard-20b", "hf": "openai/gpt-oss-safeguard-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-nano-12b-v2-vl:free", "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", "hf": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "context": 128000, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94242947610823, "uptime_1d": 93.32141994687274}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94242947610823, "uptime_1d": 93.32141994687274}}, {"id": "nvidia/nemotron-nano-12b-v2-vl", "name": "NVIDIA: Nemotron Nano 12B 2 VL", "hf": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "context": 131072, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-32b-instruct", "name": "Qwen: Qwen3 VL 32B Instruct", "hf": "Qwen/Qwen3-VL-32B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-8b-thinking", "name": "Qwen: Qwen3 VL 8B Thinking", "hf": "Qwen/Qwen3-VL-8B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 1.365, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 1.365, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-8b-instruct", "name": "Qwen: Qwen3 VL 8B Instruct", "hf": "Qwen/Qwen3-VL-8B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99264920580795}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.77595103804153, "uptime_1d": 98.67297004497782}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32000, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.62881896944823, "uptime_1d": 98.54493264902968}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 57.03141038801067, "uptime_1d": 93.08841169202464}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99264920580795}}, {"id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "hf": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "context": 131072, "open_weight": true, "params_total_b": 49.0, "params_active_b": 49.0, "kv_gb_per_1k": 0.196, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98701298701299}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98701298701299}}, {"id": "baidu/ernie-4.5-21b-a3b-thinking", "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "hf": "baidu/ERNIE-4.5-21B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 21.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.336, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "qwen/qwen3-vl-30b-a3b-thinking", "name": "Qwen: Qwen3 VL 30B A3B Thinking", "hf": "Qwen/Qwen3-VL-30B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.5791518290709}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.29, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 82.30994152046783}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.5791518290709}}, {"id": "qwen/qwen3-vl-30b-a3b-instruct", "name": "Qwen: Qwen3 VL 30B A3B Instruct", "hf": "Qwen/Qwen3-VL-30B-A3B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98262682418346, "uptime_1d": 99.98423816759241}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32000, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 81.08558108558108, "uptime_1d": 91.8072598504381}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9204665959703, "uptime_1d": 99.45929958500088}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.23119165293795, "uptime_1d": 94.2152584953061}, {"provider": "Phala", "tag": "phala", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.73251942286349, "uptime_1d": 97.9021590789796}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.08633093525181, "uptime_1d": 94.28436871173524}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.29, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.22766570605188, "uptime_1d": 92.87602114368092}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98262682418346, "uptime_1d": 99.98423816759241}}, {"id": "thedrummer/cydonia-24b-v4.1", "name": "TheDrummer: Cydonia 24B V4.1", "hf": "thedrummer/cydonia-24b-v4.1", "context": 131072, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.384, "providers": [{"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-235b-a22b-thinking", "name": "Qwen: Qwen3 VL 235B A22B Thinking", "hf": "Qwen/Qwen3-VL-235B-A22B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86282578875172, "uptime_1d": 99.6266156055529}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.98, "completion_per_mtok": 3.95, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98378071527046}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86282578875172, "uptime_1d": 99.6266156055529}}, {"id": "qwen/qwen3-vl-235b-a22b-instruct", "name": "Qwen: Qwen3 VL 235B A22B Instruct", "hf": "Qwen/Qwen3-VL-235B-A22B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 25.523560209424083, "uptime_1d": 82.78650453427547}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 1.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95923359152059, "uptime_1d": 99.7187006009738}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.25, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59475218658892, "uptime_1d": 93.93047112462007}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.46155996645233, "uptime_1d": 87.45459364233041}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 74.50090744101632, "uptime_1d": 85.02783074121258}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.21, "completion_per_mtok": 1.9, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.25023488881929, "uptime_1d": 97.22329817101271}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 25.523560209424083, "uptime_1d": 82.78650453427547}}, {"id": "alibaba/tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "hf": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-next-80b-a3b-thinking", "name": "Qwen: Qwen3 Next 80B A3B Thinking", "hf": "Qwen/Qwen3-Next-80B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.42528735632183}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.17355371900827}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.89270386266095}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.77011494252874}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.42528735632183}}, {"id": "qwen/qwen3-next-80b-a3b-instruct:free", "name": "Qwen: Qwen3 Next 80B A3B Instruct (free)", "hf": "Qwen/Qwen3-Next-80B-A3B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 262144, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 63.48641655886158}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 262144, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 63.48641655886158}}, {"id": "qwen/qwen3-next-80b-a3b-instruct", "name": "Qwen: Qwen3 Next 80B A3B Instruct", "hf": "Qwen/Qwen3-Next-80B-A3B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96499008052282, "uptime_1d": 99.38025615176959}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92704449749216}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98475609756098, "uptime_1d": 99.87787358738768}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98994344279677}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96375902539656}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.677245831092, "uptime_1d": 95.45495053916763}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96499008052282, "uptime_1d": 99.38025615176959}}, {"id": "nvidia/nemotron-nano-9b-v2:free", "name": "NVIDIA: Nemotron Nano 9B V2 (free)", "hf": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "context": 128000, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.14400000000000002, "providers": [{"provider": "Nvidia", "tag": "nvidia/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.93698802772528, "uptime_1d": 98.51666154715075}], "cheapest": {"provider": "Nvidia", "tag": "nvidia/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.93698802772528, "uptime_1d": 98.51666154715075}}, {"id": "nvidia/nemotron-nano-9b-v2", "name": "NVIDIA: Nemotron Nano 9B V2", "hf": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "context": 131072, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.14400000000000002, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-30b-a3b-thinking-2507", "name": "Qwen: Qwen3 30B A3B Thinking 2507", "hf": "Qwen/Qwen3-30B-A3B-Thinking-2507", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 98.68203691733103}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.97065823362567}, {"provider": "Alibaba", "tag": "alibaba", "context": 81920, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 98.68203691733103}}, {"id": "nousresearch/hermes-4-70b", "name": "Nous: Hermes 4 70B", "hf": "NousResearch/Hermes-4-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nousresearch/hermes-4-405b", "name": "Nous: Hermes 4 405B", "hf": "NousResearch/Hermes-4-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 6.48, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 1.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 1.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "baidu/ernie-4.5-21b-a3b", "name": "Baidu: ERNIE 4.5 21B A3B", "hf": "baidu/ERNIE-4.5-21B-A3B-PT", "context": 120000, "open_weight": true, "params_total_b": 21.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.336, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 120000, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 120000, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "baidu/ernie-4.5-vl-28b-a3b", "name": "Baidu: ERNIE 4.5 VL 28B A3B", "hf": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "context": 30000, "open_weight": true, "params_total_b": 28.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.448, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 30000, "quantization": "fp16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 30000, "quantization": "fp16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "openai/gpt-oss-120b:free", "name": "OpenAI: gpt-oss-120b (free)", "hf": "openai/gpt-oss-120b", "context": 131072, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.87826034026234, "uptime_1d": 99.3890139848677}], "cheapest": {"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.87826034026234, "uptime_1d": 99.3890139848677}}, {"id": "openai/gpt-oss-120b", "name": "OpenAI: gpt-oss-120b", "hf": "openai/gpt-oss-120b", "context": 131072, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.5614476485388, "uptime_1d": 47.60379344547681}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99256422649367, "uptime_1d": 99.51077552678176}, {"provider": "Novita", "tag": "novita/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 32768, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99120492524186, "uptime_1d": 98.2916975651959}, {"provider": "Google", "tag": "google-vertex", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.36, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.74634671588987, "uptime_1d": 99.00415432626369}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.07390300230946, "uptime_1d": 92.51337360538574}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94251536345345}, {"provider": "Phala", "tag": "phala", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.49, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47937525030036, "uptime_1d": 96.72812408545508}, {"provider": "BaseTen", "tag": "baseten/fp4", "context": 128072, "quantization": "fp4", "max_completion_tokens": 128072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99271366526058}, {"provider": "Io Net", "tag": "io-net/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.175, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.41656942823805, "uptime_1d": 98.77918419980996}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92728594800944, "uptime_1d": 99.95947194326072}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86095661846495, "uptime_1d": 99.89755847703603}, {"provider": "Together", "tag": "together", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95662546085448, "uptime_1d": 99.07211291024981}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97408181000442, "uptime_1d": 99.97658850013684}, {"provider": "Fireworks", "tag": "fireworks", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.99063983419136}, {"provider": "WandB", "tag": "wandb/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96481763809078, "uptime_1d": 99.98796143807846}, {"provider": "Nebius", "tag": "nebius/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91860654403386, "uptime_1d": 99.94320819112627}, {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.9613501674826}, {"provider": "Parasail", "tag": "parasail/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98392412185517, "uptime_1d": 99.87735951899775}, {"provider": "SambaNova", "tag": "sambanova", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.95, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.77728743748162, "uptime_1d": 96.00370780246493}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 40960, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92981361615847, "uptime_1d": 99.96287358455541}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.5614476485388, "uptime_1d": 47.60379344547681}}, {"id": "openai/gpt-oss-20b:free", "name": "OpenAI: gpt-oss-20b (free)", "hf": "openai/gpt-oss-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9381857518158, "uptime_1d": 96.28670545652344}], "cheapest": {"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9381857518158, "uptime_1d": 96.28670545652344}}, {"id": "openai/gpt-oss-20b", "name": "OpenAI: gpt-oss-20b", "hf": "openai/gpt-oss-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.02940690566662, "uptime_1d": 99.93134805131857}, {"provider": "Novita", "tag": "novita/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.18494200043773, "uptime_1d": 96.24104528568287}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.15384615384616, "uptime_1d": 83.29677683185248}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.4768962510898, "uptime_1d": 81.09778413627895}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.88151658767772, "uptime_1d": 99.8191531951903}, {"provider": "Parasail", "tag": "parasail/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95689895594059}, {"provider": "Together", "tag": "together", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92732558139535, "uptime_1d": 99.61435775734203}, {"provider": "WandB", "tag": "wandb/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99493978342274, "uptime_1d": 99.98588994154404}, {"provider": "Google", "tag": "google-vertex", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99505293361037}, {"provider": "Fireworks", "tag": "fireworks", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.86135693215338, "uptime_1d": 99.01289679174496}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.71052948980824, "uptime_1d": 99.71441514793479}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.42693409742121, "uptime_1d": 99.73719558488582}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.02940690566662, "uptime_1d": 99.93134805131857}}, {"id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "hf": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "context": 160000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 160000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.27, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.72602739726028, "uptime_1d": 99.78641069454216}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": 89.8876404494382, "uptime_1d": 94.39538787126907}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 0, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.2925, "completion_per_mtok": 1.4625, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.8025666337611}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 160000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.27, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.72602739726028, "uptime_1d": 99.78641069454216}}, {"id": "qwen/qwen3-30b-a3b-instruct-2507", "name": "Qwen: Qwen3 30B A3B Instruct 2507", "hf": "Qwen/Qwen3-30B-A3B-Instruct-2507", "context": 262144, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.49427917620137, "uptime_1d": 80.1509162773985}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96983135728723, "uptime_1d": 98.53410967656384}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70758784754919, "uptime_1d": 99.72369259153507}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99127678753162, "uptime_1d": 99.95009143553794}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91720483523763, "uptime_1d": 99.78613389642383}, {"provider": "Venice", "tag": "venice", "context": 256000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19, "completion_per_mtok": 0.69, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.68926553672316, "uptime_1d": 94.74492683321206}], "cheapest": {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.49427917620137, "uptime_1d": 80.1509162773985}}, {"id": "qwen/qwen3-235b-a22b-thinking-2507", "name": "Qwen: Qwen3 235B A22B Thinking 2507", "hf": "Qwen/Qwen3-235B-A22B-Thinking-2507", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 1.495, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96027692656907}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 2.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.83748645720478}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.28, "completion_per_mtok": 2.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.79536152796726}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.50093574547722}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 1.495, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96027692656907}}, {"id": "z-ai/glm-4-32b", "name": "Z.ai: GLM 4 32B ", "hf": "", "context": 128000, "open_weight": false, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [{"provider": "Z.AI", "tag": "z-ai", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Z.AI", "tag": "z-ai", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-coder:free", "name": "Qwen: Qwen3 Coder 480B A35B (free)", "hf": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "context": 262000, "open_weight": true, "params_total_b": 480.0, "params_active_b": 35.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 262000, "quantization": "fp8", "max_completion_tokens": 262000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 48.627234199373504}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 262000, "quantization": "fp8", "max_completion_tokens": 262000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 48.627234199373504}}, {"id": "qwen/qwen3-coder", "name": "Qwen: Qwen3 Coder 480B A35B", "hf": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 480.0, "params_active_b": 35.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 262144, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 77.96143250688705, "uptime_1d": 98.13233599134382}, {"provider": "Novita", "tag": "novita/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.52681388012618, "uptime_1d": 99.58604376108812}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.35, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 40.76923076923077, "uptime_1d": 92.72514315460697}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.22, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.9010989010989, "uptime_1d": 99.77141257697332}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 97.53236245954693}, {"provider": "Together", "tag": "together/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.14563106796116, "uptime_1d": 97.22355193872666}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.78, "completion_per_mtok": 3.8, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.57645065650148}, {"provider": "Alibaba", "tag": "alibaba/opensource", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.975, "completion_per_mtok": 4.875, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.75429975429975, "uptime_1d": 99.56225680933852}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 262144, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 77.96143250688705, "uptime_1d": 98.13233599134382}}, {"id": "bytedance/ui-tars-1.5-7b", "name": "ByteDance: UI-TARS 7B ", "hf": "ByteDance-Seed/UI-TARS-1.5-7B", "context": 128000, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.112, "providers": [{"provider": "Parasail", "tag": "parasail/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 2048, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 2048, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", "hf": "Qwen/Qwen3-235B-A22B-Instruct-2507", "context": 262144, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.071, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.6719810227837, "uptime_1d": 97.27675475972455}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.70591114566238, "uptime_1d": 89.619085734929}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.58, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.78885946847113, "uptime_1d": 98.93488888843753}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 81.20282736312447, "uptime_1d": 84.51794213955372}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.79015843038506, "uptime_1d": 99.34773523644196}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 0.5980000000000001, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9717673630717, "uptime_1d": 98.53499176001077}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 50.67750677506775, "uptime_1d": 83.7552917738856}, {"provider": "Friendli", "tag": "friendli", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.581589958159, "uptime_1d": 98.87134714557135}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.82164090368609, "uptime_1d": 99.41333333333333}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 16384, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.9446279571535}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 16384, "prompt_per_mtok": 0.25, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93549820470004}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 40960, "prompt_per_mtok": 0.6, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.73564308706673}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.071, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.6719810227837, "uptime_1d": 97.27675475972455}}, {"id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", "name": "Venice: Uncensored (free)", "hf": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Venice", "tag": "venice/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 56.587537091988125}], "cheapest": {"provider": "Venice", "tag": "venice/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 56.587537091988125}}, {"id": "google/gemma-3n-e2b-it:free", "name": "Google: Gemma 3n 2B (free)", "hf": "google/gemma-3n-E2B-it", "context": 8192, "open_weight": true, "params_total_b": 2.0, "params_active_b": 2.0, "kv_gb_per_1k": 0.008, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95373048004626}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95373048004626}}, {"id": "baidu/ernie-4.5-vl-424b-a47b", "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "hf": "baidu/ERNIE-4.5-VL-424B-A47B-PT", "context": 123000, "open_weight": true, "params_total_b": 424.0, "params_active_b": 47.0, "kv_gb_per_1k": 6.784, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 123000, "quantization": "fp16", "max_completion_tokens": 16000, "prompt_per_mtok": 0.42, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 123000, "quantization": "fp16", "max_completion_tokens": 16000, "prompt_per_mtok": 0.42, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "baidu/ernie-4.5-300b-a47b", "name": "Baidu: ERNIE 4.5 300B A47B ", "hf": "baidu/ERNIE-4.5-300B-A47B-PT", "context": 123000, "open_weight": true, "params_total_b": 300.0, "params_active_b": 47.0, "kv_gb_per_1k": 4.8, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 123000, "quantization": "bf16", "max_completion_tokens": 12000, "prompt_per_mtok": 0.28, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 123000, "quantization": "bf16", "max_completion_tokens": 12000, "prompt_per_mtok": 0.28, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mistral-small-3.2-24b-instruct", "name": "Mistral: Mistral Small 3.2 24B", "hf": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "context": 128000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59782070331848, "uptime_1d": 98.30798143249059}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09375, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.6415163614668}, {"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 75.37328490718322, "uptime_1d": 99.06654325303747}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95473064735174, "uptime_1d": 99.83462221144899}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59782070331848, "uptime_1d": 98.30798143249059}}, {"id": "google/gemma-3n-e4b-it:free", "name": "Google: Gemma 3n 4B (free)", "hf": "google/gemma-3n-E4B-it", "context": 8192, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92182410423453}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92182410423453}}, {"id": "google/gemma-3n-e4b-it", "name": "Google: Gemma 3n 4B", "hf": "google/gemma-3n-E4B-it", "context": 32768, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98437622060776, "uptime_1d": 99.97245632878446}], "cheapest": {"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98437622060776, "uptime_1d": 99.97245632878446}}, {"id": "meta-llama/llama-guard-4-12b", "name": "Meta: Llama Guard 4 12B", "hf": "meta-llama/Llama-Guard-4-12B", "context": 163840, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 163840, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.18, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.83663771915452, "uptime_1d": 99.87231927329199}, {"provider": "Together", "tag": "together", "context": 1048576, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94244808501915}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 163840, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.18, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.83663771915452, "uptime_1d": 99.87231927329199}}, {"id": "qwen/qwen3-30b-a3b", "name": "Qwen: Qwen3 30B A3B", "hf": "Qwen/Qwen3-30B-A3B", "context": 40960, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 69.76744186046511, "uptime_1d": 83.58137531703115}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91809991809993, "uptime_1d": 99.24615614270786}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99693350301283}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.55, "throughput_tps": null, "latency_ms": null, "uptime_30m": 85.42074363992172, "uptime_1d": 92.89517470881864}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 69.76744186046511, "uptime_1d": 83.58137531703115}}, {"id": "qwen/qwen3-8b", "name": "Qwen: Qwen3 8B", "hf": "Qwen/Qwen3-8B", "context": 40960, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66654316413486, "uptime_1d": 98.91310699468241}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7270423084422, "uptime_1d": 99.80121551822462}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66654316413486, "uptime_1d": 98.91310699468241}}, {"id": "qwen/qwen3-14b", "name": "Qwen: Qwen3 14B", "hf": "Qwen/Qwen3-14B", "context": 40960, "open_weight": true, "params_total_b": 14.0, "params_active_b": 14.0, "kv_gb_per_1k": 0.056, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 40960, "quantization": "int4", "max_completion_tokens": 40960, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.3702698018372}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.12, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.4122832794593}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.22749999999999998, "completion_per_mtok": 0.9099999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.9156355455568}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 40960, "quantization": "int4", "max_completion_tokens": 40960, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.3702698018372}}, {"id": "qwen/qwen3-32b", "name": "Qwen: Qwen3 32B", "hf": "Qwen/Qwen3-32B", "context": 40960, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 40960, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 60.63829787234043, "uptime_1d": 80.71855117417905}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99123172362393, "uptime_1d": 99.95532706624392}, {"provider": "Nebius", "tag": "nebius/base", "context": 40960, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95456610631531, "uptime_1d": 99.9501359389284}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.11897446763429, "uptime_1d": 94.96507040214283}, {"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 82.76566757493188, "uptime_1d": 85.58817107028106}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.5700000000000001, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.65699395949308, "uptime_1d": 61.411808938950074}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 40960, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.59, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.64772042343772}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.84189723320158, "uptime_1d": 99.92557216398933}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 40960, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 60.63829787234043, "uptime_1d": 80.71855117417905}}, {"id": "qwen/qwen3-235b-a22b", "name": "Qwen: Qwen3 235B A22B", "hf": "Qwen/Qwen3-235B-A22B", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.45499999999999996, "completion_per_mtok": 1.8199999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.45499999999999996, "completion_per_mtok": 1.8199999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "alfredpros/codellama-7b-instruct-solidity", "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "hf": "AlfredPros/CodeLlama-7b-Instruct-Solidity", "context": 4096, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.112, "providers": [{"provider": "Featherless", "tag": "featherless", "context": 4096, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Featherless", "tag": "featherless", "context": 4096, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "mistralai/mistral-small-3.1-24b-instruct", "name": "Mistral: Mistral Small 3.1 24B", "hf": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "context": 128000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "google/gemma-3-4b-it:free", "name": "Google: Gemma 3 4B (free)", "hf": "google/gemma-3-4b-it", "context": 32768, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94678020223523}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94678020223523}}, {"id": "google/gemma-3-4b-it", "name": "Google: Gemma 3 4B", "hf": "google/gemma-3-4b-it", "context": 131072, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99791014520014}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99791014520014}}, {"id": "google/gemma-3-12b-it:free", "name": "Google: Gemma 3 12B (free)", "hf": "google/gemma-3-12b-it", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.048, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47089947089947, "uptime_1d": 99.71797884841364}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47089947089947, "uptime_1d": 99.71797884841364}}, {"id": "google/gemma-3-12b-it", "name": "Google: Gemma 3 12B", "hf": "google/gemma-3-12b-it", "context": 131072, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.048, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.13, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99274934485152, "uptime_1d": 99.90805156520699}, {"provider": "SambaNova", "tag": "sambanova", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.59, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.12514263978699, "uptime_1d": 99.66111412570517}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.19082939986514, "uptime_1d": 99.87338893884427}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.13, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99274934485152, "uptime_1d": 99.90805156520699}}, {"id": "google/gemma-3-27b-it:free", "name": "Google: Gemma 3 27B (free)", "hf": "google/gemma-3-27b-it", "context": 131072, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93304767005891}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93304767005891}}, {"id": "google/gemma-3-27b-it", "name": "Google: Gemma 3 27B", "hf": "google/gemma-3-27b-it", "context": 131072, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.76609105180533, "uptime_1d": 96.21278190365878}, {"provider": "Novita", "tag": "novita/bf16", "context": 98304, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.119, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 66.57407407407408, "uptime_1d": 84.05480625133805}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 110000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.63198867657466, "uptime_1d": 99.92719063073233}, {"provider": "Phala", "tag": "phala", "context": 53920, "quantization": "unknown", "max_completion_tokens": 53920, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.64837905236908, "uptime_1d": 95.71753033291495}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98462366418083, "uptime_1d": 99.80558315445853}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.76609105180533, "uptime_1d": 96.21278190365878}}, {"id": "thedrummer/skyfall-36b-v2", "name": "TheDrummer: Skyfall 36B V2", "hf": "TheDrummer/Skyfall-36B-v2", "context": 32768, "open_weight": true, "params_total_b": 36.0, "params_active_b": 36.0, "kv_gb_per_1k": 0.5760000000000001, "providers": [{"provider": "Parasail", "tag": "parasail/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.55, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.55, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "meta-llama/llama-guard-3-8b", "name": "Llama Guard 3 8B", "hf": "meta-llama/Llama-Guard-3-8B", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.48, "completion_per_mtok": 0.03, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.48, "completion_per_mtok": 0.03, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "aion-labs/aion-rp-llama-3.1-8b", "name": "AionLabs: Aion-RP 1.0 (8B)", "hf": "", "context": 32768, "open_weight": false, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "AionLabs", "tag": "aion-labs", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.5999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "AionLabs", "tag": "aion-labs", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.5999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen2.5-vl-72b-instruct", "name": "Qwen: Qwen2.5 VL 72B Instruct", "hf": "Qwen/Qwen2.5-VL-72B-Instruct", "context": 32000, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 0.28800000000000003, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.45570003023889, "uptime_1d": 99.33195414188431}, {"provider": "Novita", "tag": "novita/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.76095617529882, "uptime_1d": 99.64309865504546}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 128000, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.0856102003643, "uptime_1d": 98.58166661669016}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.45570003023889, "uptime_1d": 99.33195414188431}}, {"id": "mistralai/mistral-small-24b-instruct-2501", "name": "Mistral: Mistral Small 3", "hf": "mistralai/Mistral-Small-24B-Instruct-2501", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99961400244722}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99961400244722}}, {"id": "deepseek/deepseek-r1-distill-qwen-32b", "name": "DeepSeek: R1 Distill Qwen 32B", "hf": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "context": 32768, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "deepseek/deepseek-r1-distill-llama-70b", "name": "DeepSeek: R1 Distill Llama 70B", "hf": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.7, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.14893617021276, "uptime_1d": 99.67477851295278}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.73049857763138}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.7, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.14893617021276, "uptime_1d": 99.67477851295278}}, {"id": "sao10k/l3.1-70b-hanami-x1", "name": "Sao10K: Llama 3.1 70B Hanami x1", "hf": "Sao10K/L3.1-70B-Hanami-x1", "context": 16000, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Infermatic", "tag": "infermatic/bf16", "context": 16000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 3.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Infermatic", "tag": "infermatic/bf16", "context": 16000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 3.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "sao10k/l3.3-euryale-70b", "name": "Sao10K: Llama 3.3 Euryale 70B", "hf": "Sao10K/L3.3-70B-Euryale-v2.3", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.16342412451361, "uptime_1d": 87.91520772215388}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92634602636812}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.16342412451361, "uptime_1d": 87.91520772215388}}, {"id": "meta-llama/llama-3.3-70b-instruct:free", "name": "Meta: Llama 3.3 70B Instruct (free)", "hf": "meta-llama/Llama-3.3-70B-Instruct", "context": 65536, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "Venice", "tag": "venice/fp8", "context": 65536, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 67.36545682102629}], "cheapest": {"provider": "Venice", "tag": "venice/fp8", "context": 65536, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 67.36545682102629}}, {"id": "meta-llama/llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", "hf": "meta-llama/Llama-3.3-70B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.32, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.37143559488692, "uptime_1d": 98.01275607139937}, {"provider": "Inceptron", "tag": "inceptron/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.12, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.32851985559566, "uptime_1d": 96.96112735464698}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.67809431836471, "uptime_1d": 99.59633005674003}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 128000, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.36109618484686, "uptime_1d": 98.92760663776077}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 120000, "prompt_per_mtok": 0.135, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47046843177189, "uptime_1d": 99.62453463224054}, {"provider": "Parasail", "tag": "parasail/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.22170301142263, "uptime_1d": 99.08461520901885}, {"provider": "Friendli", "tag": "friendli", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.6, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.97261024376883}, {"provider": "SambaNova", "tag": "sambanova-turbo", "context": 16000, "quantization": "bf16", "max_completion_tokens": 3072, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 83.9226150767178, "uptime_1d": 93.05003861855897}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.59, "completion_per_mtok": 0.7899999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94692144373673, "uptime_1d": 99.96567409485316}, {"provider": "WandB", "tag": "wandb/fp16", "context": 128000, "quantization": "fp16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.71, "completion_per_mtok": 0.71, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99297890864156}, {"provider": "Google", "tag": "google-vertex", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.73569482288828, "uptime_1d": 92.14145383104125}, {"provider": "Google", "tag": "google-vertex", "context": 128000, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.90590809628009, "uptime_1d": 99.55547148396859}, {"provider": "Together", "tag": "together/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.88, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.37888198757764, "uptime_1d": 98.27067669172932}, {"provider": "SambaNova", "tag": "sambanova/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 3072, "prompt_per_mtok": 0.6, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.63157894736842, "uptime_1d": 81.90545004128819}, {"provider": "Cloudflare", "tag": "cloudflare/fp8", "context": 24000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.29, "completion_per_mtok": 2.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.33531510107015, "uptime_1d": 96.5827169740661}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.32, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.37143559488692, "uptime_1d": 98.01275607139937}}, {"id": "qwen/qwen-2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B Instruct", "hf": "Qwen/Qwen2.5-Coder-32B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.66, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.66, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "thedrummer/unslopnemo-12b", "name": "TheDrummer: UnslopNemo 12B", "hf": "TheDrummer/UnslopNemo-12B-v4.1", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.75133474731223}], "cheapest": {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.75133474731223}}, {"id": "anthracite-org/magnum-v4-72b", "name": "Magnum v4 72B", "hf": "anthracite-org/magnum-v4-72b", "context": 16384, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 1.1520000000000001, "providers": [{"provider": "Mancer 2", "tag": "mancer/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 3.0, "completion_per_mtok": 5.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 3.0, "completion_per_mtok": 5.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen-2.5-7b-instruct", "name": "Qwen: Qwen2.5 7B Instruct", "hf": "Qwen/Qwen2.5-7B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.028, "providers": [{"provider": "Phala", "tag": "phala", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 20.853310087655576, "uptime_1d": 95.99651272168308}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.75977287617384, "uptime_1d": 99.30429570217598}, {"provider": "Together", "tag": "together/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70535048117658, "uptime_1d": 99.7318058406728}], "cheapest": {"provider": "Phala", "tag": "phala", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 20.853310087655576, "uptime_1d": 95.99651272168308}}, {"id": "nvidia/llama-3.1-nemotron-70b-instruct", "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "hf": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.2, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.2, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "thedrummer/rocinante-12b", "name": "TheDrummer: Rocinante 12B", "hf": "TheDrummer/Rocinante-12B-v1.1", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.16999999999999998, "completion_per_mtok": 0.43, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7627326651667}, {"provider": "Infermatic", "tag": "infermatic/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.52830188679245, "uptime_1d": 99.98148319600037}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.16999999999999998, "completion_per_mtok": 0.43, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7627326651667}}, {"id": "meta-llama/llama-3.2-3b-instruct:free", "name": "Meta: Llama 3.2 3B Instruct (free)", "hf": "meta-llama/Llama-3.2-3B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Venice", "tag": "venice/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 55.24475524475524}], "cheapest": {"provider": "Venice", "tag": "venice/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 55.24475524475524}}, {"id": "meta-llama/llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", "hf": "meta-llama/Llama-3.2-3B-Instruct", "context": 80000, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.051, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99869485135171}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.051, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99869485135171}}, {"id": "meta-llama/llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", "hf": "meta-llama/Llama-3.2-1B-Instruct", "context": 60000, "open_weight": true, "params_total_b": 1.0, "params_active_b": 1.0, "kv_gb_per_1k": 0.004, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 60000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.027, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 60000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.027, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "meta-llama/llama-3.2-11b-vision-instruct", "name": "Meta: Llama 3.2 11B Vision Instruct", "hf": "meta-llama/Llama-3.2-11B-Vision-Instruct", "context": 131072, "open_weight": true, "params_total_b": 11.0, "params_active_b": 11.0, "kv_gb_per_1k": 0.044, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.245, "completion_per_mtok": 0.245, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.245, "completion_per_mtok": 0.245, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", "hf": "Qwen/Qwen2.5-72B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 0.28800000000000003, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.36, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.971870604782, "uptime_1d": 99.71919789705181}, {"provider": "Novita", "tag": "novita/bf16", "context": 32000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.38, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.74554707379136}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.36, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.971870604782, "uptime_1d": 99.71919789705181}}, {"id": "sao10k/l3.1-euryale-70b", "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "hf": "Sao10K/L3.1-70B-Euryale-v2.2", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95469646632438}, {"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95469646632438}}, {"id": "nousresearch/hermes-3-llama-3.1-70b", "name": "Nous: Hermes 3 70B Instruct", "hf": "NousResearch/Hermes-3-Llama-3.1-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nousresearch/hermes-3-llama-3.1-405b:free", "name": "Nous: Hermes 3 405B Instruct (free)", "hf": "NousResearch/Hermes-3-Llama-3.1-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 1.62, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 50.265708033760546}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 50.265708033760546}}, {"id": "nousresearch/hermes-3-llama-3.1-405b", "name": "Nous: Hermes 3 405B Instruct", "hf": "NousResearch/Hermes-3-Llama-3.1-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 1.62, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "sao10k/l3-lunaris-8b", "name": "Sao10K: Llama 3 8B Lunaris", "hf": "Sao10K/L3-8B-Lunaris-v1", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 8192, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96606142881384, "uptime_1d": 99.96301856973528}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.10600255427842, "uptime_1d": 94.32528743360452}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 8192, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96606142881384, "uptime_1d": 99.96301856973528}}, {"id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", "hf": "meta-llama/Meta-Llama-3.1-8B-Instruct", "context": 16384, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.69707401032701, "uptime_1d": 97.99223527344239}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.5028961701392, "uptime_1d": 99.6032352450478}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.64943725454019, "uptime_1d": 99.51845375057525}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98667732480682, "uptime_1d": 99.97299342866116}, {"provider": "Friendli", "tag": "friendli", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8000, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98796820066211}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 15.491452991452991, "uptime_1d": 14.93305029392554}, {"provider": "Cloudflare", "tag": "cloudflare/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.03948772678763, "uptime_1d": 99.90947593029647}, {"provider": "WandB", "tag": "wandb/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.22, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.69707401032701, "uptime_1d": 97.99223527344239}}, {"id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", "hf": "meta-llama/Meta-Llama-3.1-70B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.15870125929298, "uptime_1d": 99.71119426591363}, {"provider": "DeepInfra", "tag": "deepinfra/base", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.27385570209465, "uptime_1d": 99.54320793325988}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.78577549271637, "uptime_1d": 97.3890742094681}, {"provider": "WandB", "tag": "wandb/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98133569958702}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.15870125929298, "uptime_1d": 99.71119426591363}}, {"id": "google/gemma-2-27b-it", "name": "Google: Gemma 2 27B", "hf": "google/gemma-2-27b-it", "context": 8192, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": 2048, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": 2048, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "sao10k/l3-euryale-70b", "name": "Sao10k: Llama 3 Euryale 70B v2.1", "hf": "Sao10K/L3-70B-Euryale-v2.1", "context": 8192, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "hf": "NousResearch/Hermes-2-Pro-Llama-3-8B", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 8192, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 8192, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "meta-llama/llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", "hf": "meta-llama/Meta-Llama-3-8B-Instruct", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99594542542624}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}, {"provider": "Together", "tag": "together/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.16107382550335, "uptime_1d": 91.95635282591805}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 7968, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.28, "completion_per_mtok": 0.83, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99594542542624}}, {"id": "meta-llama/llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", "hf": "meta-llama/Meta-Llama-3-70B-Instruct", "context": 8192, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8000, "prompt_per_mtok": 0.51, "completion_per_mtok": 0.74, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8000, "prompt_per_mtok": 0.51, "completion_per_mtok": 0.74, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", "hf": "mistralai/Mixtral-8x22B-Instruct-v0.1", "context": 65536, "open_weight": true, "params_total_b": 147.84, "params_active_b": 44.0, "kv_gb_per_1k": 0.59136, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 65536, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 6.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 65536, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 6.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "microsoft/wizardlm-2-8x22b", "name": "WizardLM-2 8x22B", "hf": "microsoft/WizardLM-2-8x22B", "context": 65535, "open_weight": true, "params_total_b": 147.84, "params_active_b": 44.0, "kv_gb_per_1k": 2.36544, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 65535, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.62, "completion_per_mtok": 0.62, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 65535, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.62, "completion_per_mtok": 0.62, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mixtral-8x7b-instruct", "name": "Mistral: Mixtral 8x7B Instruct", "hf": "mistralai/Mixtral-8x7B-Instruct-v0.1", "context": 32768, "open_weight": true, "params_total_b": 47.04, "params_active_b": 14.0, "kv_gb_per_1k": 0.18816, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.54, "completion_per_mtok": 0.54, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99671538840532}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.54, "completion_per_mtok": 0.54, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99671538840532}}, {"id": "alpindale/goliath-120b", "name": "Goliath 120B", "hf": "alpindale/goliath-120b", "context": 6144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Mancer 2", "tag": "mancer/int4", "context": 6144, "quantization": "int4", "max_completion_tokens": 1024, "prompt_per_mtok": 3.75, "completion_per_mtok": 7.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Mancer 2", "tag": "mancer/int4", "context": 6144, "quantization": "int4", "max_completion_tokens": 1024, "prompt_per_mtok": 3.75, "completion_per_mtok": 7.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "mistralai/mistral-7b-instruct-v0.1", "name": "Mistral: Mistral 7B Instruct v0.1", "hf": "mistralai/Mistral-7B-Instruct-v0.1", "context": 2824, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.028, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 2824, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.35174854768592}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 2824, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.35174854768592}}, {"id": "undi95/remm-slerp-l2-13b", "name": "ReMM SLERP 13B", "hf": "Undi95/ReMM-SLERP-L2-13B", "context": 6144, "open_weight": true, "params_total_b": 13.0, "params_active_b": 13.0, "kv_gb_per_1k": 0.20800000000000002, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 6144, "quantization": "bf16", "max_completion_tokens": 4096, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.66506717850288}, {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 6144, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.5, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 6144, "quantization": "bf16", "max_completion_tokens": 4096, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.66506717850288}}, {"id": "gryphe/mythomax-l2-13b", "name": "MythoMax 13B", "hf": "Gryphe/MythoMax-L2-13b", "context": 4096, "open_weight": true, "params_total_b": 13.0, "params_active_b": 13.0, "kv_gb_per_1k": 0.20800000000000002, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 4096, "quantization": "int4", "max_completion_tokens": 4096, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.46881135448771}, {"provider": "DeepInfra", "tag": "deepinfra/fp16", "context": 4096, "quantization": "fp16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91935483870968, "uptime_1d": 99.9922570654278}, {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.5, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.97910718016576}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 4096, "quantization": "int4", "max_completion_tokens": 4096, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.46881135448771}}];
const GPUS = [{"name": "RTX 3090", "tier": "consumer", "vram": 24, "bandwidth": 936, "tdp": 350, "msrp": 1499, "street": 700, "released": 2020, "fp8": false, "fp4": false}, {"name": "RTX 4090", "tier": "consumer", "vram": 24, "bandwidth": 1008, "tdp": 450, "msrp": 1599, "street": 1900, "released": 2022, "fp8": true, "fp4": false}, {"name": "RTX 5070 Ti", "tier": "consumer", "vram": 16, "bandwidth": 896, "tdp": 300, "msrp": 749, "street": 900, "released": 2025, "anchor": true, "fp8": true, "fp4": true}, {"name": "RTX 5080", "tier": "consumer", "vram": 16, "bandwidth": 960, "tdp": 360, "msrp": 999, "street": 1200, "released": 2025, "fp8": true, "fp4": true}, {"name": "RTX 5090", "tier": "consumer", "vram": 32, "bandwidth": 1792, "tdp": 575, "msrp": 1999, "street": 2800, "released": 2025, "fp8": true, "fp4": true}, {"name": "RTX Pro 6000 Blackwell", "tier": "workstation", "vram": 96, "bandwidth": 1792, "tdp": 600, "msrp": 8500, "street": 8800, "released": 2025, "fp8": true, "fp4": true}, {"name": "L4", "tier": "datacenter", "vram": 24, "bandwidth": 300, "tdp": 72, "msrp": 2500, "street": 2400, "released": 2023, "fp8": true, "fp4": false}, {"name": "L40S", "tier": "datacenter", "vram": 48, "bandwidth": 864, "tdp": 350, "msrp": 8000, "street": 7500, "released": 2023, "fp8": true, "fp4": false}, {"name": "A100 40GB", "tier": "datacenter", "vram": 40, "bandwidth": 1555, "tdp": 400, "msrp": 10000, "street": 6800, "released": 2020, "fp8": false, "fp4": false}, {"name": "A100 80GB", "tier": "datacenter", "vram": 80, "bandwidth": 2039, "tdp": 400, "msrp": 15000, "street": 11500, "released": 2021, "fp8": false, "fp4": false}, {"name": "H100 80GB SXM", "tier": "datacenter", "vram": 80, "bandwidth": 3350, "tdp": 700, "msrp": 30000, "street": 24000, "released": 2022, "fp8": true, "fp4": false}, {"name": "H200 141GB", "tier": "datacenter", "vram": 141, "bandwidth": 4800, "tdp": 700, "msrp": 32000, "street": 30000, "released": 2024, "fp8": true, "fp4": false}, {"name": "B200 192GB", "tier": "datacenter", "vram": 192, "bandwidth": 8000, "tdp": 1000, "msrp": 40000, "street": 38000, "released": 2025, "fp8": true, "fp4": true}];
const FRONTIER = [{"label": "OpenAI GPT-5.x", "model": "OpenAI: GPT-5 Nano", "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997}, {"label": "Claude Opus", "model": "Anthropic: Claude Opus 4.7", "prompt_per_mtok": 5.0, "completion_per_mtok": 25.0}, {"label": "Claude Sonnet", "model": "Anthropic: Claude Sonnet 4.6", "prompt_per_mtok": 3.0, "completion_per_mtok": 15.0}, {"label": "Gemini 2.x Pro", "model": "Google: Gemini 2.5 Pro", "prompt_per_mtok": 1.25, "completion_per_mtok": 10.0}, {"label": "Grok", "model": "xAI: Grok 4.1 Fast", "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.5}];
const ENGINES = {"engines": [{"name": "llama.cpp", "single_stream": 1.0, "batched": 1.0, "hardware": "All (NVIDIA, AMD, Apple, CPU)", "use_case": "Single-user; broadest hardware support; reference baseline.", "notes": "User's anchor. Sequential queueing \u2014 does not scale with concurrency.", "source": "jan.ai TRT-LLM bench: 100 t/s on 4090"}, {"name": "Ollama", "single_stream": 0.95, "batched": 0.3, "hardware": "All", "use_case": "Convenience wrapper around llama.cpp.", "notes": "Same single-stream as llama.cpp; collapses under concurrent load (no continuous batching).", "source": "sitepoint Ollama vs vLLM 2026"}, {"name": "ExLlamaV2", "single_stream": 1.3, "batched": 1.5, "hardware": "NVIDIA only", "use_case": "Single-user enthusiast; consistent single-stream leader on consumer NVIDIA.", "notes": "EXL2 mixed-bpw quantization. 7B 139 t/s on 4090 (~1.3x llama.cpp).", "source": "turboderp/exllama #16"}, {"name": "ExLlamaV3", "single_stream": 1.35, "batched": 1.6, "hardware": "NVIDIA only", "use_case": "EXL2 successor; better low-bpw quality.", "notes": "Estimated; comparable to ExLlamaV2 on speed.", "source": "turboderp-org/exllamav3"}, {"name": "vLLM", "single_stream": 1.05, "batched": 8.0, "hardware": "NVIDIA, AMD ROCm, Gaudi, TPU", "use_case": "Multi-user serving \u2014 de facto open-source production engine.", "notes": "PagedAttention + continuous batching. ~tied with llama.cpp at batch=1; 35-44x ahead under load.", "source": "Red Hat vLLM vs llama.cpp 2025"}, {"name": "SGLang", "single_stream": 1.1, "batched": 10.0, "hardware": "NVIDIA, AMD ROCm", "use_case": "Serving with prefix caching wins (RAG, multi-turn, agents).", "notes": "RadixAttention. ~29% throughput edge over vLLM on H100; up to 6.4x on prefix-heavy workloads.", "source": "premai.io 2026 bench"}, {"name": "TensorRT-LLM", "single_stream": 1.7, "batched": 10.0, "hardware": "NVIDIA only (best on Hopper/Blackwell)", "use_case": "Lowest-latency single-user OR highest-throughput serving on NVIDIA.", "notes": "70% faster than llama.cpp on 4090 (170 vs 100 t/s). Compile-time + ops complexity costs.", "source": "jan.ai TRT-LLM bench"}, {"name": "MLC-LLM", "single_stream": 1.2, "batched": 1.5, "hardware": "NVIDIA, AMD, Apple, Vulkan, WebGPU, mobile", "use_case": "Cross-platform single-user; best-in-class for mobile/Web.", "notes": "TVM-compiled. Strong on non-NVIDIA targets. Multiplier estimated.", "source": "MLC project leaderboard"}, {"name": "LMDeploy", "single_stream": 1.4, "batched": 9.0, "hardware": "NVIDIA only", "use_case": "Production serving \u2014 strong for INT4 large models.", "notes": "TurboMind C++ engine. ~28% faster than vLLM on H100. 2.4x INT4 vs FP16.", "source": "premai.io 2026 bench"}, {"name": "Aphrodite", "single_stream": 1.05, "batched": 8.0, "hardware": "NVIDIA, AMD ROCm", "use_case": "vLLM fork with broader quant support (EXL2 + GGUF + AWQ + GPTQ).", "notes": "Tracks vLLM closely; main draw is format compatibility.", "source": "github.com/aphrodite-engine"}, {"name": "TGI (HF)", "single_stream": 1.0, "batched": 6.0, "hardware": "NVIDIA, AMD, Gaudi, Inferentia", "use_case": "HF's serving engine; vLLM competitor.", "notes": "Built-in speculative decoding (Medusa, n-gram). Slightly behind vLLM under load.", "source": "marktechpost 2025-11"}, {"name": "HF Transformers", "single_stream": 0.4, "batched": 0.5, "hardware": "NVIDIA, AMD, CPU", "use_case": "Reference / research only \u2014 not for production.", "notes": "Eager-mode PyTorch, no kernel fusion. ~2-3x slower than llama.cpp at batch=1.", "source": "HF community consensus"}], "speculative": {"default_speedup": 1.15, "best_case": 2.5, "notes": "Highly workload-dependent. Code/structured-output with vocab-matched draft: up to 2.5x. MoE on consumer Ampere/Ada: often 0.9-1.0x (slowdown reported). Default 1.15x average."}, "fp8_note": "On Hopper (H100/H200): native FP8 \u2248 2x FP16 throughput, quality \u2248 BF16. On Blackwell (5090/B200): native FP4 \u2248 2x FP8 again. On Ampere/Ada: Q4 weight-only is the speed champion at batch=1 (no FP8/FP4 tensor cores). At batch=1 single-stream is memory-bandwidth-bound, so Q4\u2248FP4\u2248FP8\u2248INT8 within \u00b120% on the same GPU.", "batch_caveat": "API providers run batches of 64-256 with continuous batching. Their per-token unit economics are 5-15x better than your batch=1 local number. Comparing single-user local TPS to API price systematically makes local look worse than it is. Use 'Batched serving' mode if you'll concurrent-serve."};
const QUANTS = [{"id": "FP16", "label": "FP16", "bpp": 2.0, "format": "native", "engines": ["all"], "quality": "lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "FP8", "label": "FP8", "bpp": 1.0, "format": "native", "engines": ["vLLM", "TensorRT-LLM", "SGLang", "LMDeploy", "TGI (HF)"], "quality": "near-lossless", "needs_fp8": true, "needs_fp4": false}, {"id": "NVFP4", "label": "NVFP4", "bpp": 0.5, "format": "native", "engines": ["TensorRT-LLM", "vLLM"], "quality": "good", "needs_fp8": false, "needs_fp4": true}, {"id": "Q8_0", "label": "Q8_0 (GGUF)", "bpp": 1.0625, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "Q6_K", "label": "Q6_K (GGUF)", "bpp": 0.8125, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "Q5_K_M", "label": "Q5_K_M (GGUF)", "bpp": 0.6875, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q4_K_M", "label": "Q4_K_M (GGUF) \u00b7 default", "bpp": 0.5625, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q4_K_S", "label": "Q4_K_S (GGUF)", "bpp": 0.5, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q3_K_M", "label": "Q3_K_M (GGUF)", "bpp": 0.4375, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "lossy", "needs_fp8": false, "needs_fp4": false}, {"id": "AWQ-INT4", "label": "AWQ INT4", "bpp": 0.5, "format": "AWQ", "engines": ["vLLM", "TGI (HF)", "Aphrodite", "LMDeploy", "SGLang"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "GPTQ-INT4", "label": "GPTQ INT4", "bpp": 0.5, "format": "GPTQ", "engines": ["vLLM", "TGI (HF)", "Aphrodite", "ExLlamaV2", "ExLlamaV3"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-4.0", "label": "EXL2 4.0bpw", "bpp": 0.5, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-5.0", "label": "EXL2 5.0bpw", "bpp": 0.625, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-6.0", "label": "EXL2 6.0bpw", "bpp": 0.75, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}];
const ANCHOR_EFF = 0.621512;
const S = {
tab: "model",
modelId: null,
gpuName: "RTX 5070 Ti",
mode: "payback",
quant: "Q4_K_M",
kvQuant: localStorage.getItem("kv_quant") || "Q8",
price: "street", // "street" | "msrp" | "custom"
engine: "llama.cpp",
conc: "single",
priceOverrides: JSON.parse(localStorage.getItem("gpu_price_overrides") || "{}"),
apiPriceOverride: null,
usage: JSON.parse(localStorage.getItem("or_usage_v1") || "null"),
budget: parseFloat(localStorage.getItem("budget") || "1000"),
includeDC: localStorage.getItem("include_dc") === "true",
};
function persistBudget() { localStorage.setItem("budget", String(S.budget)); }
function persistDC() { localStorage.setItem("include_dc", String(S.includeDC)); }
function visibleGpus() {
return GPUS.filter(g => S.includeDC || g.tier !== "datacenter");
}
function persistOverrides() { localStorage.setItem("gpu_price_overrides", JSON.stringify(S.priceOverrides)); }
const $ = (id) => document.getElementById(id);
const KV_BYTES = {FP16: 2.0, Q8: 1.0, Q4: 0.5};
const FRAMEWORK_OVERHEAD_GB = 1.0; // CUDA context, activations, framework buffers
// ============================ math
function currentQuant() { return QUANTS.find(q => q.id === S.quant) || QUANTS.find(q => q.id === "Q4_K_M"); }
function currentEngine() { return ENGINES.engines.find(e => e.name === S.engine) || ENGINES.engines[0]; }
function engineMult() { const e = currentEngine(); return S.conc === "single" ? e.single_stream : e.batched; }
function spec() { return parseFloat($("spec").value); }
function ioRatio() { return parseFloat($("io").value); }
function years() { return parseFloat($("years").value); }
function util() { return parseFloat($("util").value); }
function kwh() { return parseFloat($("kwh").value); }
function minTps() { return parseInt($("mintps").value); }
function tpd() { const exp = parseFloat($(activeTpdInput()).value); return Math.pow(10, exp); }
function activeTpdInput() { return S.tab === "model" ? "tpd" : S.tab === "gpu" ? "tpd2" : "tpd3"; }
function ctxTokens(model) {
const id = S.tab === "gpu" ? "ctx2" : "ctx";
const el = $(id);
if (!el) return 8192;
const exp = parseFloat(el.value);
let n = Math.pow(2, exp);
if (model && model.context) n = Math.min(n, model.context);
return Math.round(n);
}
function weightsGB(model) {
if (!model.params_total_b) return 0;
return model.params_total_b * currentQuant().bpp;
}
function kvGB(model, ctx, kvQuant) {
if (!model.kv_gb_per_1k) return 0;
const q = kvQuant || S.kvQuant;
return (model.kv_gb_per_1k * (ctx / 1000)) * (KV_BYTES[q] / 2.0);
}
function vramNeed(model, ctx, kvQuant) {
return weightsGB(model) + kvGB(model, ctx ?? ctxTokens(model), kvQuant) + FRAMEWORK_OVERHEAD_GB;
}
// Engine ↔ quant compatibility
function engineSupportsQuant(engineName, quantId) {
const q = QUANTS.find(x => x.id === quantId);
if (!q) return true;
if (q.engines.includes("all")) return true;
return q.engines.includes(engineName);
}
function gpuSupportsQuant(gpu, quantId) {
const q = QUANTS.find(x => x.id === quantId);
if (!q) return true;
if (q.needs_fp8 && !gpu.fp8) return false;
if (q.needs_fp4 && !gpu.fp4) return false;
return true;
}
// Returns {ok, reason, kv_used, downgraded, ...}.
// Auto-falls-back from user's KV quant → Q8 → Q4 to find a fit.
function fitCheck(model, gpu, opts = {}) {
const ctx = opts.ctx ?? ctxTokens(model);
if (!gpuSupportsQuant(gpu, S.quant)) return {ok:false, reason:"quant-gpu", detail:`${gpu.name} lacks ${S.quant} support`};
if (!engineSupportsQuant(S.engine, S.quant)) return {ok:false, reason:"quant-engine", detail:`${S.engine} doesn't support ${S.quant}`};
const w = weightsGB(model);
if (w + FRAMEWORK_OVERHEAD_GB > gpu.vram) return {ok:false, reason:"weights", detail:`weights ${w.toFixed(1)}GB + overhead > ${gpu.vram}GB`};
// KV fallback ladder: user's choice → Q8 → Q4. Skip duplicates.
const ladder = [S.kvQuant, "Q8", "Q4"].filter((v, i, a) => a.indexOf(v) === i);
let chosenKv = null, chosenKvBytes = 0;
for (const kvq of ladder) {
const kv = kvGB(model, ctx, kvq);
if (w + kv + FRAMEWORK_OVERHEAD_GB <= gpu.vram) { chosenKv = kvq; chosenKvBytes = kv; break; }
}
if (chosenKv === null) {
// Even with Q4 KV doesn't fit
const minKv = kvGB(model, ctx, "Q4");
const ctxFitTokens = Math.max(0, (gpu.vram - w - FRAMEWORK_OVERHEAD_GB) * 1000 / (model.kv_gb_per_1k * KV_BYTES["Q4"] / 2));
return {ok:false, reason:"kv", detail:`even with Q4 KV (${minKv.toFixed(1)}GB) at ${(ctx/1024).toFixed(0)}K context > ${gpu.vram}GB · max ≈ ${(ctxFitTokens/1024).toFixed(0)}K`};
}
const tps = tpsFor(model, gpu);
if (minTps() > 0 && tps < minTps()) return {ok:false, reason:"tps", tps, detail:`${tps.toFixed(0)} t/s < target ${minTps()} t/s`};
return {
ok: true,
tps,
vram_used: w + chosenKvBytes + FRAMEWORK_OVERHEAD_GB,
kv_used: chosenKv,
downgraded: chosenKv !== S.kvQuant,
};
}
function fits(model, gpu) { return fitCheck(model, gpu).ok; }
function tpsFor(model, gpu) {
if (!model.params_active_b) return 0;
const base = ANCHOR_EFF * gpu.bandwidth / (model.params_active_b * currentQuant().bpp);
return base * engineMult() * spec();
}
function gpuPrice(gpu) {
const o = S.priceOverrides[gpu.name];
if (typeof o === "number" && o > 0) return o;
return gpu[S.price] || gpu.street;
}
function priceLabel(gpu) {
const o = S.priceOverrides[gpu.name];
if (typeof o === "number" && o > 0) return "yours";
return S.price;
}
function apiPerToken(model) {
// Cost per OUTPUT token, including its proportional share of input tokens.
// Total cost = outTok × outC + (outTok × ioRatio) × inC = outTok × (outC + ioRatio × inC)
if (!model.cheapest) return null;
const r = ioRatio();
const inC = model.cheapest.prompt_per_mtok / 1e6;
const outC = model.cheapest.completion_per_mtok / 1e6;
return outC + r * inC;
}
function dailyApiCost(model) { const p = apiPerToken(model); return p === null ? 0 : p * tpd(); }
function dailyApiBreakdown(model) {
if (!model.cheapest) return null;
const r = ioRatio();
const out = tpd();
const inn = out * r;
const inC = model.cheapest.prompt_per_mtok / 1e6;
const outC = model.cheapest.completion_per_mtok / 1e6;
return {
out, inn,
in_cost: inn * inC,
out_cost: out * outC,
total: inn * inC + out * outC,
in_per_mtok: model.cheapest.prompt_per_mtok,
out_per_mtok: model.cheapest.completion_per_mtok,
};
}
function dailyPower(gpu) { return (gpu.tdp / 1000) * util() * kwh() * 24; }
function dailyAmort(gpu) { return gpuPrice(gpu) / (years() * 365); }
function paybackDays(model, gpu) {
// Days to recoup hardware sticker price from net daily savings (API avoided − electricity).
// Excludes amortization (that's the question — when does sticker price equal what you saved).
// If electricity alone exceeds API savings, GPU never pays back.
const p = apiPerToken(model);
if (!p || p <= 0) return Infinity;
const tpsCap = tpsFor(model, gpu) * 86400;
const usedTokens = Math.min(tpd(), tpsCap);
const dailyNet = (p * usedTokens) - dailyPower(gpu);
if (dailyNet <= 0) return Infinity;
return gpuPrice(gpu) / dailyNet;
}
function dailyTcoSavings(model, gpu) {
const tpsCap = tpsFor(model, gpu) * 86400;
const used = Math.min(tpd(), tpsCap);
const apiSpend = (apiPerToken(model) || 0) * used;
const gpuCost = dailyAmort(gpu) + dailyPower(gpu);
return apiSpend - gpuCost;
}
function fmtTok(n) {
if (!isFinite(n) || n < 0) return "—";
if (n >= 1e12) return (n/1e12).toFixed(1)+"T";
if (n >= 1e9) return (n/1e9).toFixed(1)+"B";
if (n >= 1e6) return (n/1e6).toFixed(1)+"M";
if (n >= 1e3) return (n/1e3).toFixed(1)+"K";
return n.toFixed(0);
}
function fmtMoney(n) {
if (!isFinite(n)) return "$∞";
const sign = n < 0 ? "-" : "";
n = Math.abs(n);
if (n >= 1000) return sign + "$" + n.toLocaleString(undefined, {maximumFractionDigits: 0});
if (n >= 10) return sign + "$" + Math.round(n).toLocaleString();
if (n >= 1) return sign + "$" + n.toFixed(2);
if (n >= 0.001) return sign + "$" + n.toFixed(4);
return sign + "$" + n.toExponential(1);
}
function fmtDays(n) {
if (!isFinite(n) || n < 0) return "never";
if (n < 1) return (n*24).toFixed(1) + " hours";
if (n < 60) return Math.round(n) + " days";
if (n < 730) return (n/30).toFixed(1) + " months";
return (n/365).toFixed(1) + " years";
}
// ============================ tabs
function setTab(name) {
S.tab = name;
document.querySelectorAll(".tab-btn").forEach(b => b.dataset.active = b.dataset.tab === name);
document.querySelectorAll("[data-pane]").forEach(p => p.classList.toggle("hidden", p.dataset.pane !== name));
if (name === "browse") {
syncSlider("tpd3", "tpd-val3");
renderBrowse();
} else if (name === "model") {
syncSlider("tpd", "tpd-val");
renderModelTab();
} else if (name === "gpu") {
syncSlider("tpd2", "tpd-val2");
renderGpuTab();
} else if (name === "usage") {
renderUsageTab();
}
}
function syncSlider(id, valId) { $(valId).textContent = fmtTok(Math.pow(10, parseFloat($(id).value))); }
function fmtCtx(t) { return t >= 1024 ? (t/1024).toFixed(t >= 10240 ? 0 : 1) + "K" : t; }
function renderAllValueLabels() {
syncSlider("tpd", "tpd-val");
syncSlider("tpd2", "tpd-val2");
syncSlider("tpd3", "tpd-val3");
$("vram-val").textContent = $("vram").value + " GB";
$("years-val").textContent = years() + "y";
$("util-val").textContent = (util()*100).toFixed(0) + "%";
$("kwh-val").textContent = "$" + kwh().toFixed(3);
$("io-val").textContent = ioRatio() + ":1";
$("spec-val").textContent = spec().toFixed(2) + "x";
$("mintps-val").textContent = minTps() === 0 ? "off" : minTps() + " t/s";
const ctxV = Math.round(Math.pow(2, parseFloat($("ctx").value)));
const ctxV2 = Math.round(Math.pow(2, parseFloat($("ctx2").value)));
$("ctx-val").textContent = fmtCtx(ctxV);
$("ctx-val2").textContent = fmtCtx(ctxV2);
const e = currentEngine();
$("engine-help").textContent = `${e.use_case} · ${S.conc === "single" ? e.single_stream : e.batched}× vs llama.cpp`;
const q = currentQuant();
$("quant-help").textContent = `${q.format} · ${q.bpp.toFixed(2)} B/param · ${q.quality} · runs on: ${q.engines.join(", ")}`;
// Settings strip on model tab
if ($("settings-strip")) {
$("settings-strip").textContent = `${e.name} · ${q.label} weights · ${S.kvQuant} KV · ≥${minTps()} t/s · ${S.price} prices · ${S.conc === "single" ? "single-stream" : "batched"}`;
}
}
// ============================ MODEL TAB
function setupModelCombobox() {
const input = $("model-search");
const list = $("model-results");
const open = () => list.classList.remove("hidden");
const close = () => list.classList.add("hidden");
function renderResults(q) {
q = q.toLowerCase().trim();
const candidates = DATA
.filter(r => r.cheapest && r.params_total_b)
.filter(r => !q || (r.name||"").toLowerCase().includes(q) || r.id.toLowerCase().includes(q))
.sort((a, b) => {
// Prefer open-weight, then smaller params (fits more GPUs)
if (a.open_weight !== b.open_weight) return b.open_weight - a.open_weight;
return a.params_total_b - b.params_total_b;
})
.slice(0, 30);
list.innerHTML = candidates.map(r => `
<button data-mid="${r.id}" class="w-full text-left px-4 py-2 hover:bg-white/5 flex items-center justify-between gap-3">
<div class="min-w-0">
<div class="text-sm truncate">${r.name}</div>
<div class="text-[11px] text-zinc-500 mono truncate">${r.id} · ${r.params_total_b}B${r.params_active_b !== r.params_total_b ? " (A"+r.params_active_b+"B)" : ""}</div>
</div>
<div class="text-right text-[11px] mono shrink-0">
<div>$${(r.cheapest.prompt_per_mtok).toFixed(2)} / $${(r.cheapest.completion_per_mtok).toFixed(2)}</div>
<div class="text-zinc-500">${r.open_weight ? "open" : "closed"} · ${r.providers.length}p</div>
</div>
</button>
`).join("") || `<div class="px-4 py-3 text-sm text-zinc-500">no matches</div>`;
}
// Bind ONCE via delegation — survives renderResults() rebuilds.
list.addEventListener("mousedown", e => {
const b = e.target.closest("button[data-mid]");
if (!b) return;
e.preventDefault(); // beat input blur
S.modelId = b.dataset.mid;
const r = DATA.find(x => x.id === S.modelId);
input.value = r.name;
$("model-meta").textContent = `${r.params_total_b}B${r.params_active_b !== r.params_total_b ? " (A"+r.params_active_b+"B)" : ""} · ${r.open_weight ? "open" : "closed"}`;
close();
renderModelTab();
});
input.addEventListener("focus", () => { renderResults(input.value); open(); });
input.addEventListener("input", () => { renderResults(input.value); open(); });
document.addEventListener("mousedown", e => { if (!e.target.closest('[data-target="model-combo"]')) close(); });
// Default selection: Qwen3.5 9B if found, else first open model
const defaultPick = DATA.find(r => r.id === "qwen/qwen3.5-9b") || DATA.find(r => r.open_weight && r.cheapest && r.params_total_b);
if (defaultPick) {
S.modelId = defaultPick.id;
input.value = defaultPick.name;
$("model-meta").textContent = `${defaultPick.params_total_b}B · ${defaultPick.open_weight ? "open" : "closed"}`;
}
}
function classifyVerdict(payback_days) {
// GPU FITS the model. Tier reflects economic case only.
if (!isFinite(payback_days) || payback_days <= 0) return {tier:"info", label:"NO PAYBACK"};
if (payback_days <= 365) return {tier:"buy", label:"BUY"};
if (payback_days <= 365*3) return {tier:"maybe", label:"MAYBE"};
return {tier:"info", label:"SLOW"};
}
function failBadge(reason) {
const map = {
"weights": {label: "weights too big", color: "bg-rose-500/15 text-rose-300"},
"kv": {label: "context too big", color: "bg-amber-500/15 text-amber-300"},
"quant-gpu": {label: "quant unsupported", color: "bg-rose-500/15 text-rose-300"},
"quant-engine":{label: "engine mismatch", color: "bg-rose-500/15 text-rose-300"},
"tps": {label: "below TPS target", color: "bg-amber-500/15 text-amber-300"},
};
return map[reason] || {label: "won't fit", color: "bg-rose-500/15 text-rose-300"};
}
function paybackPhrase(pb) {
return isFinite(pb) ? `${fmtDays(pb)} payback` : `doesn't pay back at this usage`;
}
function isMainstream(r) {
return r.providers.length >= 2 && !r.id.includes(":") && !/solidity|roleplay|erp|nsfw|uncensored/i.test(r.id);
}
function pillClass(tier) {
return ({
buy: "bg-emerald-500/15 text-emerald-300",
maybe: "bg-amber-500/15 text-amber-300",
info: "bg-zinc-700/40 text-zinc-300",
skip: "bg-rose-500/15 text-rose-300",
})[tier] || "bg-zinc-700/40 text-zinc-300";
}
const _ignored_old_renderTradeoffs = function (best, allFitting, m) {
// Cheaper: only if strictly cheaper than best AND in budget AND fits
const strictlyCheaper = best ? allFitting.filter(s => s.price < best.price).sort((a,b) => a.price - b.price)[0] : null;
// Headroom: next tier up by VRAM (or price if same VRAM); flag over-budget separately
const headroom = best
? (allFitting.find(s => s.g.vram > best.g.vram) || allFitting.find(s => s.price > best.price))
: allFitting[1];
const card = (title, subtitle, accent, body) => `
<div class="rounded-2xl bg-zinc-900/30 grad-border glow p-4 space-y-2">
<div class="text-[10px] uppercase tracking-[.18em] ${accent}">${title}</div>
<div class="text-sm font-medium">${subtitle}</div>
<div class="text-[12px] text-zinc-400 leading-relaxed">${body}</div>
</div>`;
let html = "";
if (strictlyCheaper) {
const overBudget = strictlyCheaper.price > S.budget;
const savings = best.price - strictlyCheaper.price;
html += card(
"Cheaper alternative",
`${strictlyCheaper.g.name} · ${fmtMoney(strictlyCheaper.price)}`,
"text-emerald-300/80",
`Saves ${fmtMoney(savings)} upfront · ${paybackPhrase(strictlyCheaper.pb)} · ${strictlyCheaper.tps.toFixed(0)} tok/s · ${strictlyCheaper.g.vram}GB${overBudget ? ` · <span class="text-amber-300">over budget</span>` : ""}`
);
}
if (headroom && headroom !== strictlyCheaper && (!best || headroom.g !== best.g)) {
const overBudget = headroom.price > S.budget;
const extraVram = best ? headroom.g.vram - best.g.vram : 0;
const extraTps = best ? headroom.tps - best.tps : 0;
const extraCost = best ? headroom.price - best.price : 0;
html += card(
"More headroom · future-proof",
`${headroom.g.name} · ${fmtMoney(headroom.price)}`,
"text-indigo-300/80",
`${extraVram > 0 ? `+${extraVram}GB VRAM · ` : ""}${extraTps > 0 ? `+${extraTps.toFixed(0)} tok/s · ` : ""}${paybackPhrase(headroom.pb)}${extraCost > 0 ? ` · ${fmtMoney(extraCost)} more` : ""}${overBudget ? ` · <span class="text-amber-300">over budget</span>` : ""}`
);
}
html += `
<button id="already-own-btn" class="text-left rounded-2xl bg-zinc-900/30 grad-border glow p-4 space-y-2 hover:bg-zinc-900/60 transition">
<div class="text-[10px] uppercase tracking-[.18em] text-zinc-400">Already own a GPU?</div>
<div class="text-sm font-medium">Pick yours →</div>
<div class="text-[12px] text-zinc-500">See payback at any price you paid.</div>
</button>`;
return html;
}
function renderModelTab() {
if (!S.modelId) return;
const m = DATA.find(r => r.id === S.modelId);
if (!m) return;
// Cap context slider to model's max
const ctxEl = $("ctx");
const wantedCtx = ctxEl.value === "max" ? m.context : Math.pow(2, parseFloat(ctxEl.value));
const overMax = m.context && wantedCtx > m.context;
$("ctx-warn").classList.toggle("hidden", !overMax);
const ctx = ctxTokens(m);
const meta = `${m.params_total_b}B${m.params_active_b !== m.params_total_b ? " (A"+m.params_active_b+"B)" : ""} · ${m.open_weight ? "open" : "closed"} · max ${(m.context/1024).toFixed(0)}K · weights ${weightsGB(m).toFixed(1)}GB · KV@${(ctx/1024).toFixed(0)}K ${kvGB(m, ctx).toFixed(1)}GB`;
$("model-meta").textContent = meta;
// Score every visible GPU
const scored = visibleGpus().map(g => {
const f = fitCheck(m, g, {ctx});
const tps = tpsFor(m, g);
const pb = f.ok ? paybackDays(m, g) : Infinity;
const dailySave = f.ok ? dailyTcoSavings(m, g) : 0;
return { g, f, tps, pb, dailySave, price: gpuPrice(g), netyr: dailySave * 365 };
});
const fitGpus = scored.filter(s => s.f.ok);
const failGpus = scored.filter(s => !s.f.ok);
const inBudget = fitGpus.filter(s => s.price <= S.budget);
const overBudget = fitGpus.filter(s => s.price > S.budget);
if ($("budget-fits-count")) $("budget-fits-count").textContent = inBudget.length;
$("gpu-count").textContent = `${fitGpus.length} fit · ${failGpus.length} won't · ${inBudget.length} in budget`;
// Hide the "show fails" toggle when no failures
if ($("show-fails-label")) $("show-fails-label").style.display = failGpus.length === 0 ? "none" : "";
if ($("show-fails-text")) $("show-fails-text").textContent = `show ${failGpus.length} GPU${failGpus.length === 1 ? "" : "s"} that don't fit`;
// ─────────── Verdict strip (single row) ───────────
const cheapestFitting = fitGpus.slice().sort((a,b) => a.price - b.price)[0];
const cheapestInBudget = inBudget.slice().sort((a,b) => a.price - b.price)[0];
const verdictPick = cheapestInBudget || cheapestFitting;
if (!verdictPick) {
$("verdict-model").className = "rounded-xl border-2 grad-border glow px-5 py-4 verdict-skip flex items-center justify-between gap-4 flex-wrap";
const reasons = [...new Set(failGpus.map(s => s.f.reason))];
const hint = reasons.includes("kv") ? "Try shorter context or Q8 KV cache." :
reasons.includes("weights") ? "Try a smaller quant (Q4_K_S / Q3_K_M)." :
reasons.includes("quant-engine") ? `${S.engine} doesn't support ${S.quant}.` :
reasons.includes("tps") ? "Lower the min-TPS target." :
"No path to fit on consumer hardware.";
$("verdict-model").innerHTML = `
<div class="space-y-1">
<div class="text-[10px] uppercase tracking-[.18em] text-rose-300/80">Won't fit</div>
<div class="text-xl font-semibold">Can't run ${m.name} locally with current settings</div>
</div>
<div class="text-[12px] text-zinc-400">${hint}</div>`;
} else {
const v = classifyVerdict(verdictPick.pb);
const overBudgetFlag = !cheapestInBudget;
const overage = overBudgetFlag ? verdictPick.price - S.budget : 0;
const headroom = !overBudgetFlag ? S.budget - verdictPick.price : 0;
const tier = overBudgetFlag ? 'maybe' : v.tier;
const accentColor = {buy:'text-emerald-300/80', maybe:'text-amber-300/80', info:'text-zinc-400', skip:'text-rose-300/80'}[tier];
const headline = overBudgetFlag ? "Over budget · cheapest that fits"
: v.tier === "buy" ? "Cheapest GPU that pays back fast"
: v.tier === "maybe" ? "Cheapest GPU · pays back over years"
: "Cheapest GPU that fits · won't pay back at this usage";
const bd = dailyApiBreakdown(m);
$("verdict-model").className = `rounded-xl border-2 grad-border glow px-5 py-4 verdict-${tier} space-y-2`;
$("verdict-model").innerHTML = `
<div class="flex items-center justify-between gap-4 flex-wrap">
<div class="flex items-baseline gap-3 flex-wrap">
<div class="text-[10px] uppercase tracking-[.18em] ${accentColor}">${headline}</div>
<div class="text-2xl font-semibold tracking-tight">${verdictPick.g.name}</div>
<div class="text-[12px] text-zinc-400 mono">
<span data-edit-gpu-price="${verdictPick.g.name}" class="cursor-pointer underline decoration-dotted decoration-zinc-600 underline-offset-4 hover:text-indigo-300">${fmtMoney(verdictPick.price)}</span>
${S.priceOverrides[verdictPick.g.name] !== undefined ? ` <span class="pill bg-indigo-500/20 text-indigo-200">custom</span>` : ""}
· ${verdictPick.tps.toFixed(0)} tok/s
· ${verdictPick.f.vram_used.toFixed(1)}/${verdictPick.g.vram} GB
${overBudgetFlag ? ` · <span class="text-amber-300">${fmtMoney(overage)} over budget</span>` : headroom > 0 ? ` · <span class="text-zinc-500">${fmtMoney(headroom)} under $${S.budget.toLocaleString()} budget</span>` : ""}
</div>
</div>
<div class="text-right text-[12px] mono">
<div class="${verdictPick.netyr > 0 ? 'text-emerald-300' : 'text-rose-300'} font-medium">${verdictPick.netyr > 0 ? "saves" : "loses"} ${fmtMoney(Math.abs(verdictPick.netyr))}/yr</div>
<div class="text-zinc-500">${paybackPhrase(verdictPick.pb)}</div>
</div>
</div>
${bd ? `<div class="pt-2 border-t border-white/5 grid grid-cols-2 md:grid-cols-4 gap-2 text-[11px] mono">
<div><span class="text-zinc-500">API price</span> $${bd.in_per_mtok.toFixed(2)} in / $${bd.out_per_mtok.toFixed(2)} out per Mtok</div>
<div><span class="text-zinc-500">API spend / day</span> <span class="text-rose-300">${fmtMoney(bd.total)}</span></div>
<div><span class="text-zinc-500">GPU power / day</span> ${fmtMoney(dailyPower(verdictPick.g))} <span class="text-zinc-600">(${verdictPick.g.tdp}W × ${(util()*100)|0}% × $${kwh().toFixed(3)}/kWh)</span></div>
<div><span class="text-zinc-500">GPU amort / day</span> ${fmtMoney(verdictPick.price/(years()*365))} <span class="text-zinc-600">(over ${years()}y)</span></div>
</div>` : ""}`;
// Show-the-math panel
if (bd) {
const dailyP = dailyPower(verdictPick.g);
const dailyA = verdictPick.price / (years() * 365);
const totalDaily = dailyP + dailyA;
const netDaily = bd.total - dailyP;
const netDailyTco = bd.total - totalDaily;
$("math-body").textContent = [
`Workload`,
` ${fmtTok(bd.out)} output tokens/day`,
` ${fmtTok(bd.inn)} input tokens/day (${ioRatio()}× ratio)`,
``,
`API spend / day`,
` input : ${fmtTok(bd.inn)} × $${bd.in_per_mtok.toFixed(2)}/Mtok = ${fmtMoney(bd.in_cost)}`,
` output : ${fmtTok(bd.out)} × $${bd.out_per_mtok.toFixed(2)}/Mtok = ${fmtMoney(bd.out_cost)}`,
` total : ${fmtMoney(bd.total)}/day`,
``,
`Local hardware (${verdictPick.g.name})`,
` GPU price : ${fmtMoney(verdictPick.price)} (${priceLabel(verdictPick.g)})`,
` TDP × util × kWh: ${verdictPick.g.tdp}W × ${(util()*100).toFixed(0)}% × 24h × $${kwh().toFixed(3)} = ${fmtMoney(dailyP)}/day power`,
` Amortized : ${fmtMoney(verdictPick.price)} ÷ (${years()}y × 365d) = ${fmtMoney(dailyA)}/day`,
` Total daily TCO : ${fmtMoney(totalDaily)}/day`,
``,
`Break-even (vs sticker price, electricity-aware)`,
` Net daily savings = API spend − power = ${fmtMoney(bd.total)} − ${fmtMoney(dailyP)} = ${fmtMoney(netDaily)}`,
` Payback = ${fmtMoney(verdictPick.price)} ÷ ${fmtMoney(netDaily)}/day = ${paybackPhrase(verdictPick.pb)}`,
``,
`Year-1 net (full TCO including amort)`,
` (API spend − power − amort) × 365 = ${fmtMoney(netDailyTco)}/day × 365 = ${fmtMoney(netDailyTco * 365)}/yr`,
``,
`TPS estimate`,
` Anchor: RTX 5070 Ti (896 GB/s) @ Qwen3.5-9B Q4_K_M = 110 tok/s observed`,
` Calibration: ${(ANCHOR_EFF*100).toFixed(0)}% of theoretical bandwidth-bound peak`,
` This GPU: ${verdictPick.g.bandwidth} GB/s ÷ (${m.params_active_b}B × ${currentQuant().bpp.toFixed(2)} B/param) × ${engineMult()}× engine × ${spec().toFixed(2)}× spec = ${verdictPick.tps.toFixed(0)} tok/s`,
` Real-world: ±25%. Memory-bandwidth-bound single-stream decode model.`,
].join("\n");
} else {
$("math-body").textContent = "(no API price for this model — can't compute API cost.)";
}
}
// ─────────── Ranked GPU table ───────────
const sortKey = S.sortGpu || "price";
const showFails = $("show-fails")?.checked;
const sortFn = {
price: (a,b) => a.price - b.price,
tps: (a,b) => b.tps - a.tps,
payback: (a,b) => a.pb - b.pb,
netyr: (a,b) => b.netyr - a.netyr,
}[sortKey] || ((a,b) => a.price - b.price);
const visibleRows = [...fitGpus.sort(sortFn), ...(showFails ? failGpus.sort((a,b) => a.g.vram - b.g.vram) : [])];
$("gpu-table-body").innerHTML = visibleRows.map(({g, f, tps, pb, dailySave, price, netyr}) => {
const isVerdict = verdictPick && g === verdictPick.g;
const v2 = f.ok ? classifyVerdict(pb) : null;
const overBudget = f.ok && price > S.budget;
if (f.ok) {
return `<tr class="row ${isVerdict ? 'bg-indigo-500/[.04]' : ''} cursor-pointer" data-gpu="${g.name}">
<td class="px-4 py-2.5"><div class="flex items-center gap-2"><span class="text-sm">${g.name}</span><span class="text-[10px] text-zinc-500 mono">${g.tier}</span></div></td>
<td class="px-4 py-2.5 text-right mono"><span data-edit-gpu-price="${g.name}" class="hover:text-indigo-300 underline decoration-dotted decoration-zinc-700 underline-offset-4 ${overBudget ? 'text-amber-300' : ''}" onclick="event.stopPropagation()">${fmtMoney(price)}</span>${S.priceOverrides[g.name] !== undefined ? ' <span class="text-[10px] text-indigo-300/80">·custom</span>' : ''}</td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">${f.vram_used.toFixed(1)}/${g.vram}${f.downgraded ? ` <span class="pill bg-amber-500/15 text-amber-300/90 text-[9px]" title="Doesn't fit at ${S.kvQuant} KV; auto-fell-back to ${f.kv_used} KV cache to fit. Quality impact minimal.">${f.kv_used} KV</span>` : ''}</td>
<td class="px-4 py-2.5 text-right mono">${tps.toFixed(0)}</td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">${isFinite(pb) ? fmtDays(pb) : "—"}</td>
<td class="px-4 py-2.5 text-right mono ${netyr > 0 ? 'text-emerald-300' : 'text-rose-300'}">${fmtMoney(netyr)}</td>
<td class="px-4 py-2.5 text-right"><span class="pill ${pillClass(overBudget ? "maybe" : v2.tier)}">${overBudget ? "OVER" : v2.label}</span></td>
</tr>`;
} else {
const fb = failBadge(f.reason);
return `<tr class="row opacity-50">
<td class="px-4 py-2.5"><span class="text-sm line-through decoration-zinc-600">${g.name}</span> <span class="text-[10px] text-zinc-500 mono">${g.tier}</span></td>
<td class="px-4 py-2.5 text-right mono text-zinc-500">${fmtMoney(price)}</td>
<td colspan="4" class="px-4 py-2.5 text-[11px] text-zinc-500 italic">${f.detail}</td>
<td class="px-4 py-2.5 text-right"><span class="pill ${fb.color}">${fb.label}</span></td>
</tr>`;
}
}).join("");
$("gpu-table-body").querySelectorAll("tr[data-gpu]").forEach(tr => tr.addEventListener("click", e => {
if (e.target.closest("[data-edit-gpu-price]")) return;
S.gpuName = tr.dataset.gpu;
const sel = $("gpu-select"); if (sel) sel.value = S.gpuName;
setTab("gpu");
}));
// Hint line
let hint = "";
const downgradedCount = fitGpus.filter(s => s.f.downgraded).length;
if (downgradedCount > 0) {
hint = `<span class="text-amber-300/80">${downgradedCount}</span> GPU${downgradedCount === 1 ? "" : "s"} fit by auto-falling-back to a smaller KV cache (annotated in VRAM column).`;
} else if (failGpus.length && !showFails) {
const reasons = [...new Set(failGpus.map(s => s.f.reason))];
if (reasons.includes("weights")) hint = `${failGpus.length} GPU${failGpus.length===1?"":"s"} don't fit at ${currentQuant().label}. Try Q4_K_S — may bring smaller cards into range.`;
else if (reasons.includes("kv")) hint = `Some GPUs blocked even at Q4 KV cache. Shorten the context to fit.`;
else if (reasons.includes("tps")) hint = `Some GPUs below your ${minTps()} t/s target. Lower the bar to see them.`;
else hint = `${failGpus.length} GPU${failGpus.length===1?"":"s"} don't fit. Toggle "show GPUs that don't fit" to see why.`;
}
$("hint-line").innerHTML = hint;
renderApiTable(m);
}
function renderApiTable(m) {
const rows = m.providers.slice(0, 8).map(p => `
<tr class="row border-t border-white/5">
<td class="px-4 py-2 text-sm">${p.provider}</td>
<td class="px-4 py-2 text-xs text-zinc-500">${p.quantization && p.quantization !== "unknown" ? p.quantization : "—"}</td>
<td class="px-4 py-2 text-right mono num">$${p.prompt_per_mtok.toFixed(2)}</td>
<td class="px-4 py-2 text-right mono num">$${p.completion_per_mtok.toFixed(2)}</td>
<td class="px-4 py-2 text-right mono text-zinc-500">${p.throughput_tps?.toFixed?.(0) ?? "—"}</td>
<td class="px-4 py-2 text-right mono text-zinc-500">${p.uptime_1d?.toFixed?.(1) ?? "—"}%</td>
</tr>`).join("");
const front = FRONTIER.map(f => `
<tr class="border-t border-white/5 bg-black/20">
<td class="px-4 py-2 text-xs text-zinc-400">${f.label}</td>
<td class="px-4 py-2 text-xs text-zinc-500 truncate max-w-[160px]">${f.model}</td>
<td class="px-4 py-2 text-right mono num text-zinc-400">$${f.prompt_per_mtok.toFixed(2)}</td>
<td class="px-4 py-2 text-right mono num text-zinc-400">$${f.completion_per_mtok.toFixed(2)}</td>
<td class="px-4 py-2"></td><td class="px-4 py-2"></td>
</tr>`).join("");
$("api-table").innerHTML = `
<table class="w-full text-sm">
<thead><tr class="text-left">
<th class="px-4 py-2">Provider</th><th class="px-4 py-2">Quant</th>
<th class="px-4 py-2 text-right">$/Mtok in</th><th class="px-4 py-2 text-right">$/Mtok out</th>
<th class="px-4 py-2 text-right">TPS</th><th class="px-4 py-2 text-right">Uptime 1d</th>
</tr></thead>
<tbody>${rows}${front}</tbody>
</table>`;
}
// ============================ GPU TAB
function setupGpuSelect() {
const sel = $("gpu-select");
sel.innerHTML = visibleGpus().map(g => `<option value="${g.name}">${g.name} · ${g.vram}GB · ${g.tier}</option>`).join("");
sel.value = S.gpuName;
sel.addEventListener("change", () => { S.gpuName = sel.value; renderGpuTab(); });
}
function renderGpuTab() {
const g = GPUS.find(x => x.name === S.gpuName) || GPUS[0];
// Sync headline price input
const hp = $("gpu-price-headline");
if (hp && document.activeElement !== hp) hp.value = gpuPrice(g);
$("gpu-price-context").textContent = `MSRP ${fmtMoney(g.msrp)} · street ${fmtMoney(g.street)}${S.priceOverrides[g.name] ? " · using your override" : ""}`;
$("gpu-summary").innerHTML = `
<div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">VRAM</div><div class="text-base mono">${g.vram} GB</div></div>
<div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">Bandwidth</div><div class="text-base mono">${g.bandwidth} GB/s</div></div>
<div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">TDP</div><div class="text-base mono">${g.tdp} W</div></div>
<div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">MSRP</div><div class="text-base mono">${fmtMoney(g.msrp)}</div></div>
<div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">Street</div><div class="text-base mono">${fmtMoney(g.street)}</div></div>`;
// Best models for this GPU (open-weight, fits in VRAM with chosen context + KV)
const ctx = ctxTokens({context: 1e9});
const fitting = DATA
.filter(r => r.params_total_b && r.cheapest && r.open_weight)
.map(r => {
const f = fitCheck(r, g, {ctx});
if (!f.ok) return null;
const tps = tpsFor(r, g);
const pb = paybackDays(r, g);
const dailySave = dailyTcoSavings(r, g);
return { r, tps, pb, dailySave, vram_used: f.vram_used, kv_used: f.kv_used, downgraded: f.downgraded };
})
.filter(Boolean)
.sort((a, b) => b.dailySave - a.dailySave);
// Verdict pool: mainstream only (helper defined module-scope below)
const verdictPool = fitting.filter(x => isMainstream(x.r));
if (fitting.length === 0) {
$("verdict-gpu").className = "rounded-xl border-2 grad-border glow px-5 py-4 verdict-skip flex items-center gap-4 flex-wrap";
$("verdict-gpu").innerHTML = `
<div class="space-y-1"><div class="text-[10px] uppercase tracking-[.18em] text-rose-300/80">Nothing fits</div>
<div class="text-xl font-semibold">No open-weight models fit ${currentQuant().label} on this card</div></div>
<div class="text-[12px] text-zinc-400">Try a smaller quant or shorter context.</div>`;
} else {
const top = (verdictPool.length ? verdictPool : fitting)[0];
const v = classifyVerdict(top.pb);
const yrNet = top.dailySave * 365;
const accent = {buy:'text-emerald-300/80', maybe:'text-amber-300/80', info:'text-zinc-400', skip:'text-rose-300/80'}[v.tier];
$("verdict-gpu").className = `rounded-xl border-2 grad-border glow px-5 py-4 verdict-${v.tier} flex items-center justify-between gap-4 flex-wrap`;
$("verdict-gpu").innerHTML = `
<div class="flex items-baseline gap-3 flex-wrap">
<div class="text-[10px] uppercase tracking-[.18em] ${accent}">Best mainstream model</div>
<div class="text-2xl font-semibold tracking-tight">${top.r.name}</div>
<div class="text-[12px] text-zinc-400 mono">${top.r.params_total_b}B · ${top.tps.toFixed(0)} tok/s · ${top.vram_used.toFixed(1)}/${g.vram} GB</div>
</div>
<div class="text-right text-[12px] mono">
<div class="${yrNet > 0 ? 'text-emerald-300' : 'text-rose-300'} font-medium">${yrNet > 0 ? "saves" : "loses"} ${fmtMoney(Math.abs(yrNet))}/yr</div>
<div class="text-zinc-500">${paybackPhrase(top.pb)} · ${fitting.length} models fit</div>
</div>`;
}
// Models table — sortable + searchable, default newest first
const sortKey = $("gpu-model-sort")?.value || "created";
const search = ($("gpu-model-search")?.value || "").toLowerCase().trim();
const mainstreamCb = $("gpu-mainstream-only");
const mainstreamOnly = mainstreamCb ? mainstreamCb.checked : true;
const sortFn = {
created: (a,b) => (b.r.created || 0) - (a.r.created || 0),
tps: (a,b) => b.tps - a.tps,
dailySave: (a,b) => b.dailySave - a.dailySave,
payback: (a,b) => a.pb - b.pb,
params: (a,b) => b.r.params_total_b - a.r.params_total_b,
}[sortKey] || ((a,b) => (b.r.created || 0) - (a.r.created || 0));
let filtered = fitting;
if (mainstreamOnly) filtered = filtered.filter(x => isMainstream(x.r));
if (search) filtered = filtered.filter(x => (x.r.name||"").toLowerCase().includes(search) || x.r.id.toLowerCase().includes(search));
$("gpu-models").innerHTML = filtered.length === 0
? `<div class="p-6 text-sm text-zinc-500">No matching models. ${mainstreamOnly ? "Try unchecking 'mainstream only'." : "Try a different filter."}</div>`
: `<table class="w-full text-sm">
<thead><tr class="text-zinc-500 text-[10px] uppercase tracking-wider">
<th class="text-left px-4 py-2">Model</th>
<th class="text-right px-4 py-2">Params</th>
<th class="text-right px-4 py-2">VRAM used</th>
<th class="text-right px-4 py-2">TPS</th>
<th class="text-right px-4 py-2">API $/Mtok</th>
<th class="text-right px-4 py-2">Save/day</th>
<th class="text-right px-4 py-2">Payback</th>
<th class="text-right px-4 py-2">Verdict</th>
</tr></thead>
<tbody class="divide-y divide-white/5">
${filtered.sort(sortFn).slice(0, 30).map(({r, tps, pb, dailySave, vram_used, kv_used, downgraded}) => {
const v = classifyVerdict(pb);
const apiP = (apiPerToken(r) || 0) * 1e6;
const niche = !isMainstream(r);
const released = r.created ? new Date(r.created * 1000).toLocaleDateString(undefined, {year:"2-digit", month:"short"}) : "";
return `<tr class="row">
<td class="px-4 py-2.5"><div class="text-sm truncate max-w-[260px]">${r.name}${niche ? ' <span class="text-[10px] text-zinc-600">niche</span>' : ''}</div><div class="text-[10px] text-zinc-500 mono truncate max-w-[260px]">${r.id}${released ? ` · ${released}` : ""}</div></td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">${r.params_total_b}B${r.params_active_b !== r.params_total_b ? `<span class="text-zinc-600 text-[10px]"> A${r.params_active_b}B</span>` : ""}</td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">${vram_used.toFixed(1)}/${g.vram}${downgraded ? ` <span class="pill bg-amber-500/15 text-amber-300/90 text-[9px]" title="Auto-fell-back to ${kv_used} KV to fit.">${kv_used} KV</span>` : ''}</td>
<td class="px-4 py-2.5 text-right mono">${tps.toFixed(0)}</td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">$${apiP.toFixed(2)}</td>
<td class="px-4 py-2.5 text-right mono ${dailySave > 0 ? 'text-emerald-300' : 'text-rose-300'}">${fmtMoney(dailySave)}</td>
<td class="px-4 py-2.5 text-right mono text-zinc-400">${isFinite(pb) ? fmtDays(pb) : "—"}</td>
<td class="px-4 py-2.5 text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></td>
</tr>`;
}).join("")}
</tbody>
</table>`;
$("gpu-models-footer").textContent = `Showing ${Math.min(30, filtered.length)} of ${filtered.length}${mainstreamOnly ? ` mainstream` : ``} models that fit · sorted by ${sortKey}`;
// Bigger GPUs
const bigger = visibleGpus().filter(x => x.vram > g.vram).slice(0, 4);
$("bigger-gpus").innerHTML = bigger.map(b => {
const newModelsCount = DATA.filter(r => r.params_total_b && r.cheapest && r.open_weight && !fitCheck(r, g, {ctx}).ok && fitCheck(r, b, {ctx}).ok).length;
return `<div class="row px-4 py-3 flex items-center justify-between text-sm">
<div><div>${b.name}</div><div class="text-[11px] text-zinc-500 mono">${b.vram}GB · ${b.tier} · ${fmtMoney(gpuPrice(b))}</div></div>
<div class="text-xs mono text-zinc-400">+${newModelsCount} more models would fit</div>
</div>`;
}).join("") || `<div class="p-4 text-xs text-zinc-500">This is already a top-tier GPU.</div>`;
}
// ============================ BROWSE TAB (compact version of original)
function renderBrowse() {
const search = $("search").value.toLowerCase();
const openOnly = $("open-only").checked;
const vlim = parseInt($("vram").value);
const ctx = 8192;
const candidates = DATA
.filter(r => r.cheapest && r.params_total_b)
.filter(r => !openOnly || r.open_weight)
.filter(r => !search || (r.name||"").toLowerCase().includes(search) || r.id.toLowerCase().includes(search))
.map(r => {
const eligibleGpus = GPUS
.filter(g => g.vram <= vlim && fitCheck(r, g, {ctx}).ok)
.sort((a,b) => gpuPrice(a) - gpuPrice(b));
const gpu = eligibleGpus[0];
if (!gpu) return null;
const pb = paybackDays(r, gpu);
const dailySave = dailyTcoSavings(r, gpu);
return { r, gpu, pb, dailySave };
})
.filter(Boolean)
.sort((a,b) => a.pb - b.pb);
$("kpis").innerHTML = `
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Models matching</div>
<div class="mt-1 text-2xl font-semibold">${candidates.length}</div>
</div>
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Best payback</div>
<div class="mt-1 text-2xl font-semibold">${candidates[0] ? fmtDays(candidates[0].pb) : "—"}</div>
<div class="text-xs text-zinc-500 mt-1 truncate">${candidates[0] ? candidates[0].r.name + " on " + candidates[0].gpu.name : ""}</div>
</div>
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Daily usage</div>
<div class="mt-1 text-2xl font-semibold mono">${fmtTok(tpd())}</div>
<div class="text-xs text-zinc-500 mt-1">tok/day</div>
</div>`;
$("model-table").innerHTML = candidates.slice(0, 50).map(c => {
const v = classifyVerdict(c.pb);
return `<details class="row">
<summary class="px-4 py-3 grid grid-cols-12 gap-3 items-center">
<div class="col-span-5 min-w-0">
<div class="text-sm truncate">${c.r.name}</div>
<div class="text-[11px] text-zinc-500 mono truncate">${c.r.id} · ${c.r.params_total_b}B · ${c.r.providers.length} providers</div>
</div>
<div class="col-span-2 text-xs mono"><div>${c.gpu.name}</div><div class="text-zinc-500">${fmtMoney(gpuPrice(c.gpu))}</div></div>
<div class="col-span-2 text-xs mono"><span class="text-zinc-500">save/d</span> <span class="${c.dailySave>0?'text-emerald-300':'text-rose-300'}">${fmtMoney(c.dailySave)}</span></div>
<div class="col-span-2 text-xs mono"><span class="text-zinc-500">payback</span> ${fmtDays(c.pb)}</div>
<div class="col-span-1 text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></div>
</summary>
<div class="px-4 py-3 bg-black/20 text-xs">
<table class="w-full mono">
<thead><tr class="text-zinc-500"><th class="text-left py-1">Provider</th><th class="text-left py-1">Quant</th><th class="text-right py-1">$/M in</th><th class="text-right py-1">$/M out</th><th class="text-right py-1">TPS</th><th class="text-right py-1">Up 1d</th></tr></thead>
<tbody>${c.r.providers.map(p => `<tr><td class="py-0.5">${p.provider}</td><td class="text-zinc-500">${p.quantization || "?"}</td><td class="text-right">$${p.prompt_per_mtok.toFixed(2)}</td><td class="text-right">$${p.completion_per_mtok.toFixed(2)}</td><td class="text-right">${p.throughput_tps?.toFixed?.(0) ?? "—"}</td><td class="text-right">${p.uptime_1d?.toFixed?.(1) ?? "—"}%</td></tr>`).join("")}</tbody>
</table>
</div>
</details>`;
}).join("");
}
// ============================ wiring
function rerender() {
renderAllValueLabels();
if (S.tab === "model") renderModelTab();
else if (S.tab === "gpu") renderGpuTab();
else renderBrowse();
}
function bindToggleGroup(selector, key, onChange) {
document.querySelectorAll(selector).forEach(b => b.addEventListener("click", () => {
document.querySelectorAll(selector).forEach(x => x.dataset.active = x === b);
if (key && b.dataset[key]) S[key] = b.dataset[key];
if (onChange) onChange(b);
rerender();
}));
}
function setupTpdPresets(btnClass, sliderId) {
document.querySelectorAll(btnClass).forEach(b => b.addEventListener("click", () => {
document.querySelectorAll(btnClass).forEach(x => x.dataset.active = x === b);
$(sliderId).value = b.dataset.tpd;
// sync all three sliders so switching tabs feels coherent
$("tpd").value = b.dataset.tpd;
$("tpd2").value = b.dataset.tpd;
$("tpd3").value = b.dataset.tpd;
rerender();
}));
}
function setupCtxPresets(btnClass, sliderId, otherSliderId) {
document.querySelectorAll(btnClass).forEach(b => b.addEventListener("click", () => {
document.querySelectorAll(btnClass).forEach(x => x.dataset.active = x === b);
if (b.dataset.ctx === "max") {
// pick model max if available
const m = DATA.find(r => r.id === S.modelId);
const ctx = m && m.context ? m.context : 256000;
const exp = Math.log2(ctx);
$(sliderId).value = exp;
if (otherSliderId) $(otherSliderId).value = exp;
} else {
$(sliderId).value = b.dataset.ctx;
if (otherSliderId) $(otherSliderId).value = b.dataset.ctx;
}
rerender();
}));
}
// ============================ USAGE TAB
// Defensive parser: accepts whatever shape OpenRouter returns.
// Looks for an array with per-row {model, usage/cost, tokens, requests}.
function normalizeUsage(raw) {
if (!raw) return null;
// Accept our combined shape {analytics, credits, auth, fetched_at}
let analytics = raw.analytics || raw;
if (analytics?.data) analytics = analytics.data;
if (!Array.isArray(analytics)) {
// Search nested objects for first array
const found = findFirstArrayLike(raw);
if (found) analytics = found;
else return null;
}
const rows = analytics.map(r => {
const id = r.model_permaslug || r.model || r.endpoint || r.name || r.permaslug;
const cost = parseFloat(r.usage ?? r.cost ?? r.usage_in_credits ?? r.amount ?? 0);
const inTok = parseInt(r.prompt_tokens ?? r.input_tokens ?? r.tokens_prompt ?? 0);
const outTok = parseInt(r.completion_tokens ?? r.output_tokens ?? r.tokens_completion ?? 0);
const reqs = parseInt(r.requests ?? r.count ?? 0);
const date = r.date || r.day || r.created_at || null;
return id ? {id, cost, inTok, outTok, reqs, date} : null;
}).filter(Boolean);
// Aggregate by model
const byModel = new Map();
for (const r of rows) {
const cur = byModel.get(r.id) || {id: r.id, cost: 0, inTok: 0, outTok: 0, reqs: 0, days: new Set()};
cur.cost += r.cost; cur.inTok += r.inTok; cur.outTok += r.outTok; cur.reqs += r.reqs;
if (r.date) cur.days.add(r.date.slice(0,10));
byModel.set(r.id, cur);
}
const aggregated = [...byModel.values()].map(m => ({...m, days: m.days.size})).sort((a,b) => b.cost - a.cost);
const dates = rows.map(r => r.date).filter(Boolean).sort();
return {
rows: aggregated,
raw_rows: rows.length,
span: dates.length ? {from: dates[0]?.slice(0,10), to: dates[dates.length-1]?.slice(0,10)} : null,
credits: raw.credits?.data || raw.credits || null,
auth: raw.auth?.data || raw.auth || null,
fetched_at: raw.fetched_at || new Date().toISOString(),
};
}
function findFirstArrayLike(o, depth = 0) {
if (depth > 4 || !o) return null;
if (Array.isArray(o) && o.length && typeof o[0] === "object") return o;
if (typeof o === "object") {
for (const v of Object.values(o)) {
const f = findFirstArrayLike(v, depth + 1);
if (f) return f;
}
}
return null;
}
function persistUsage() { localStorage.setItem("or_usage_v1", JSON.stringify(S.usage)); }
async function fetchUsage(apiKey) {
const headers = {"Authorization": `Bearer ${apiKey}`, "Content-Type": "application/json"};
const j = async (path) => {
const r = await fetch(`https://openrouter.ai${path}`, {headers});
if (!r.ok) throw new Error(`${path}: HTTP ${r.status}`);
return r.json();
};
const [analytics, credits, auth] = await Promise.all([
j("/api/v1/analytics").catch(e => ({error: e.message})),
j("/api/v1/credits").catch(e => ({error: e.message})),
j("/api/v1/auth/key").catch(e => ({error: e.message})),
]);
return {analytics, credits, auth, fetched_at: new Date().toISOString()};
}
const BOOKMARKLET_JS = `javascript:(async()=>{try{const j=async p=>(await fetch(p,{credentials:'include'})).json();const [a,c,k]=await Promise.all([j('/api/v1/analytics'),j('/api/v1/credits'),j('/api/v1/auth/key')]);const blob=JSON.stringify({analytics:a,credits:c,auth:k,fetched_at:new Date().toISOString()});await navigator.clipboard.writeText(blob);alert('OpenRouter usage copied to clipboard ('+blob.length+' bytes). Paste it into the calculator.');}catch(e){alert('Failed: '+e.message);}})();`;
function loadUsageFromText(text) {
let parsed;
try { parsed = JSON.parse(text); } catch (e) { setUsageStatus("Invalid JSON: " + e.message, true); return false; }
const norm = normalizeUsage(parsed);
if (!norm || !norm.rows.length) { setUsageStatus("Couldn't find usage rows in that JSON.", true); return false; }
S.usage = norm;
persistUsage();
setUsageStatus(`Loaded ${norm.rows.length} models from ${norm.raw_rows} raw rows · ${norm.span ? `${norm.span.from} → ${norm.span.to}` : "no date span"}`);
renderUsageTab();
return true;
}
function setUsageStatus(msg, isError = false) {
const el = $("usage-status");
if (el) { el.textContent = msg; el.className = "text-xs " + (isError ? "text-rose-300" : "text-emerald-300"); }
}
function renderUsageTab() {
const u = S.usage;
const empty = $("usage-empty"), kpis = $("usage-kpis"), split = $("usage-split"), rowsCard = $("usage-rows");
if (!u) {
empty.classList.remove("hidden");
kpis.classList.add("hidden"); split.classList.add("hidden"); rowsCard.classList.add("hidden");
return;
}
empty.classList.add("hidden");
kpis.classList.remove("hidden"); split.classList.remove("hidden"); rowsCard.classList.remove("hidden");
// Match each usage row to DATA
const matched = u.rows.map(r => {
const m = DATA.find(d => d.id === r.id) || DATA.find(d => d.id.startsWith(r.id.split(":")[0]));
return {...r, model: m, open: m ? m.open_weight : null};
});
const totalCost = matched.reduce((s,r) => s + r.cost, 0);
const openRows = matched.filter(r => r.open === true);
const closedRows = matched.filter(r => r.open === false);
const unknownRows = matched.filter(r => r.open === null);
const openCost = openRows.reduce((s,r) => s + r.cost, 0);
const closedCost = closedRows.reduce((s,r) => s + r.cost, 0);
const unknownCost = unknownRows.reduce((s,r) => s + r.cost, 0);
const span = u.span ? `${u.span.from} → ${u.span.to}` : "all time";
const days = u.span ? Math.max(1, Math.round((new Date(u.span.to) - new Date(u.span.from)) / 86400000) + 1) : 30;
// KPIs
kpis.innerHTML = `
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Total spend</div>
<div class="mt-1 text-2xl font-semibold mono">${fmtMoney(totalCost)}</div>
<div class="text-xs text-zinc-500 mt-1">${span} · ${days}d · ${matched.length} models</div>
</div>
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Avg daily output tokens</div>
<div class="mt-1 text-2xl font-semibold mono">${fmtTok(openRows.reduce((s,r) => s + r.outTok, 0) / days + closedRows.reduce((s,r) => s + r.outTok, 0) / days)}</div>
<div class="text-xs text-zinc-500 mt-1">used to compute payback</div>
</div>
<div class="rounded-xl bg-zinc-900/40 grad-border glow p-4">
<div class="text-xs uppercase tracking-wider text-zinc-500">Snapshot</div>
<div class="mt-1 text-2xl font-semibold mono">${new Date(u.fetched_at).toLocaleDateString()}</div>
<div class="text-xs text-zinc-500 mt-1">refresh anytime</div>
</div>`;
// Open/closed split
const g = GPUS.find(x => x.name === S.gpuName) || GPUS[2];
const openTotalTokens = openRows.reduce((s,r) => s + r.outTok + r.inTok/Math.max(1,ioRatio()), 0);
const openDailyTokens = openRows.reduce((s,r) => s + r.outTok, 0) / days;
// Hypothetical open cost on local: (open_daily_tokens × days) — but we can't run closed locally, so closed stays as-is.
const localOpenDailyCost = (gpuPrice(g) / (years() * 365)) + dailyPower(g);
const localOpenTotalCost = localOpenDailyCost * days;
const openPayback = openCost / days > 0 ? gpuPrice(g) / (openCost / days - dailyPower(g)) : Infinity;
split.innerHTML = `
<div class="rounded-2xl border-2 grad-border glow p-5 verdict-${openCost > localOpenTotalCost ? 'buy' : 'maybe'}">
<div class="text-[11px] uppercase tracking-[.18em] ${openCost > localOpenTotalCost ? 'text-emerald-300/80' : 'text-amber-300/80'}">Open-weight · could move local</div>
<div class="text-2xl font-semibold mt-1 mono">${fmtMoney(openCost)}</div>
<div class="text-xs text-zinc-400 mt-2">${openRows.length} models · ${fmtTok(openRows.reduce((s,r) => s + r.outTok, 0))} output tokens</div>
<div class="text-[11px] text-zinc-500 mt-3 leading-relaxed">
At ${days}-day rate, your <span class="text-zinc-300">${g.name}</span> (${fmtMoney(gpuPrice(g))}) would cost <span class="text-zinc-300">${fmtMoney(localOpenTotalCost)}</span> in TCO — break-even <span class="text-zinc-300">${fmtDays(openPayback)}</span>.
</div>
</div>
<div class="rounded-2xl bg-zinc-900/40 grad-border glow p-5">
<div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Closed · stays on API</div>
<div class="text-2xl font-semibold mt-1 mono">${fmtMoney(closedCost)}</div>
<div class="text-xs text-zinc-400 mt-2">${closedRows.length} closed-weight models${unknownRows.length ? ` · ${unknownRows.length} unknown ($${unknownCost.toFixed(2)})` : ""}</div>
<div class="text-[11px] text-zinc-500 mt-3 leading-relaxed">No local equivalent without quality trade-off — this cost stays regardless of which GPU you buy.</div>
</div>`;
// Per-model rows
$("usage-gpu-pick").textContent = `local cost computed for ${g.name} (change in 'Evaluate a GPU' tab)`;
const sorted = matched.sort((a,b) => b.cost - a.cost).slice(0, 30);
$("usage-rows-body").innerHTML = sorted.map(r => {
const dailyOut = r.outTok / days;
const dailyIn = r.inTok / days;
const rowDailyTotal = dailyOut + dailyIn;
let rightCol;
if (r.open === true && r.model && r.model.params_total_b) {
const fit = fitCheck(r.model, g, {ctx: 8192});
if (fit.ok) {
const tps = tpsFor(r.model, g);
// Hypothetical local cost share of GPU TCO, prorated by tokens
const dailyLocal = (gpuPrice(g) / (years() * 365)) + dailyPower(g);
const dailyApi = r.cost / days;
const saving = dailyApi - dailyLocal;
const yrSaving = saving * 365;
const v = classifyVerdict(saving > 0 ? gpuPrice(g) / (saving + dailyPower(g)) : Infinity);
rightCol = `
<div class="text-xs mono"><span class="text-zinc-500">local TCO/d</span> ${fmtMoney(dailyLocal)}</div>
<div class="text-xs mono"><span class="text-zinc-500">save/d</span> <span class="${saving>0?'text-emerald-300':'text-rose-300'}">${fmtMoney(saving)}</span></div>
<div class="text-xs mono"><span class="text-zinc-500">tps</span> ${tps.toFixed(0)}</div>
<div class="text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></div>`;
} else {
rightCol = `<div class="col-span-4 text-xs text-amber-300/80">${fit.detail}</div>`;
}
} else if (r.open === false) {
rightCol = `<div class="col-span-4 text-xs text-zinc-500">closed · stays on API</div>`;
} else {
rightCol = `<div class="col-span-4 text-xs text-zinc-500">unknown model · not in catalog</div>`;
}
return `<div class="row px-4 py-3 grid grid-cols-12 gap-2 items-center">
<div class="col-span-5 min-w-0">
<div class="text-sm truncate">${r.model?.name || r.id}</div>
<div class="text-[11px] text-zinc-500 mono truncate">${r.id}${r.open === true ? " · open" : r.open === false ? " · closed" : ""}</div>
</div>
<div class="col-span-3 text-xs mono">
<div><span class="text-zinc-500">spent</span> ${fmtMoney(r.cost)}</div>
<div class="text-zinc-500">${fmtTok(r.outTok)} out · ${fmtTok(r.inTok)} in${r.reqs ? " · " + r.reqs + " reqs" : ""}</div>
</div>
${rightCol}
</div>`;
}).join("");
}
function init() {
// Engine dropdown
const sel = $("engine");
sel.innerHTML = ENGINES.engines.map(e => `<option value="${e.name}">${e.name} · ${e.single_stream}× / ${e.batched}× batched</option>`).join("");
sel.value = "llama.cpp";
sel.addEventListener("change", () => { S.engine = sel.value; rerender(); });
// Quant dropdown
const qsel = $("quant");
qsel.innerHTML = QUANTS.map(q => `<option value="${q.id}">${q.label} · ${q.format}</option>`).join("");
qsel.value = "Q4_K_M";
qsel.addEventListener("change", () => { S.quant = qsel.value; rerender(); });
// Tabs
document.querySelectorAll(".tab-btn").forEach(b => b.addEventListener("click", () => setTab(b.dataset.tab)));
// Toggle groups
bindToggleGroup(".mode-btn", "mode");
bindToggleGroup(".price-btn", "price");
bindToggleGroup(".conc-btn", "conc");
bindToggleGroup(".kv-btn", "kvQuant", b => { S.kvQuant = b.dataset.kv; localStorage.setItem("kv_quant", S.kvQuant); });
// Sliders
["tpd","tpd2","tpd3","io","years","util","kwh","spec","vram","search","open-only","ctx","ctx2","mintps"].forEach(id => {
const el = $(id);
if (!el) return;
el.addEventListener("input", () => {
if (id.startsWith("tpd")) {
$("tpd").value = $("tpd2").value = $("tpd3").value = el.value;
document.querySelectorAll(".preset-btn,.preset-btn2").forEach(b => b.dataset.active = false);
}
if (id === "ctx" || id === "ctx2") {
$("ctx").value = $("ctx2").value = el.value;
document.querySelectorAll(".ctx-btn,.ctx-btn2").forEach(b => b.dataset.active = false);
}
rerender();
});
});
setupTpdPresets(".preset-btn", "tpd");
setupTpdPresets(".preset-btn2", "tpd2");
setupCtxPresets(".ctx-btn", "ctx", "ctx2");
setupCtxPresets(".ctx-btn2", "ctx2", "ctx");
// GPU tab headline price input
$("gpu-price-headline").addEventListener("input", e => {
const v = parseFloat(e.target.value);
if (v > 0) S.priceOverrides[S.gpuName] = v; else delete S.priceOverrides[S.gpuName];
persistOverrides();
rerender();
});
document.querySelectorAll(".gpu-priceset-btn").forEach(b => b.addEventListener("click", () => {
const g = GPUS.find(x => x.name === S.gpuName);
if (!g) return;
const action = b.dataset.priceset;
if (action === "reset") delete S.priceOverrides[g.name];
else S.priceOverrides[g.name] = g[action];
persistOverrides();
rerender();
}));
// "tune ↓" → open the tune drawer and scroll into view
$("open-tune")?.addEventListener("click", () => {
const d = document.querySelector("details > summary > span");
const dd = d?.closest("details");
if (dd) { dd.open = true; dd.scrollIntoView({behavior: "smooth", block: "center"}); }
});
// GPU table sort headers
document.querySelectorAll(".sort-th").forEach(th => th.addEventListener("click", () => {
S.sortGpu = th.dataset.sort;
rerender();
}));
// Show GPUs that don't fit
$("show-fails")?.addEventListener("change", rerender);
// GPU-tab model filters
["gpu-model-search", "gpu-model-sort", "gpu-mainstream-only"].forEach(id => {
$(id)?.addEventListener("input", rerender);
$(id)?.addEventListener("change", rerender);
});
// Include data-center toggle
const dcInput = $("include-dc");
if (dcInput) {
dcInput.checked = S.includeDC;
dcInput.addEventListener("change", () => {
S.includeDC = dcInput.checked;
persistDC();
// Also rebuild the GPU dropdown to reflect new visible set
const sel = $("gpu-select");
if (sel) {
sel.innerHTML = visibleGpus().map(g => `<option value="${g.name}">${g.name} · ${g.vram}GB · ${g.tier}</option>`).join("");
if (!visibleGpus().find(g => g.name === S.gpuName)) S.gpuName = visibleGpus()[2]?.name || visibleGpus()[0]?.name;
sel.value = S.gpuName;
}
rerender();
});
}
// Budget input + presets
const budgetInput = $("budget");
budgetInput.value = S.budget;
budgetInput.addEventListener("input", () => {
const v = parseFloat(budgetInput.value);
if (v > 0) { S.budget = v; persistBudget(); }
document.querySelectorAll(".budget-btn").forEach(b => b.dataset.active = parseFloat(b.dataset.budget) === S.budget);
rerender();
});
document.querySelectorAll(".budget-btn").forEach(b => b.addEventListener("click", () => {
document.querySelectorAll(".budget-btn").forEach(x => x.dataset.active = x === b);
S.budget = parseFloat(b.dataset.budget);
persistBudget();
budgetInput.value = S.budget;
rerender();
}));
setupModelCombobox();
setupGpuSelect();
// ── Usage tab wiring ──
// Sub-tabs
document.querySelectorAll(".utab-btn").forEach(b => b.addEventListener("click", () => {
document.querySelectorAll(".utab-btn").forEach(x => x.dataset.active = x === b);
document.querySelectorAll("[data-upane]").forEach(p => p.classList.toggle("hidden", p.dataset.upane !== b.dataset.utab));
}));
// ① Paste / drop
$("copy-curl").addEventListener("click", async () => {
await navigator.clipboard.writeText($("curl-snippet").textContent);
setUsageStatus("curl command copied to clipboard.");
});
$("usage-load").addEventListener("click", () => loadUsageFromText($("usage-paste").value));
$("usage-pickfile").addEventListener("click", () => $("usage-file").click());
$("usage-file").addEventListener("change", async e => {
const f = e.target.files?.[0]; if (!f) return;
const text = await f.text();
if (loadUsageFromText(text)) $("usage-paste").value = text.slice(0, 5000);
});
// Drop zone — entire usage card
document.querySelector('[data-pane="usage"]').addEventListener("dragover", e => { e.preventDefault(); });
document.querySelector('[data-pane="usage"]').addEventListener("drop", async e => {
e.preventDefault();
const f = e.dataTransfer?.files?.[0]; if (!f) return;
loadUsageFromText(await f.text());
});
// ② Connect
const savedKey = localStorage.getItem("or_api_key") || "";
if (savedKey) $("api-key").value = savedKey;
$("api-fetch").addEventListener("click", async () => {
const key = $("api-key").value.trim();
if (!key) { setUsageStatus("Enter an API key first.", true); return; }
localStorage.setItem("or_api_key", key);
setUsageStatus("Fetching from openrouter.ai…");
$("api-status").textContent = "Calling /credits, /auth/key, /analytics …";
try {
const raw = await fetchUsage(key);
$("api-status").textContent = `analytics: ${raw.analytics?.error ? "❌ "+raw.analytics.error : "✓"} · credits: ${raw.credits?.error ? "❌" : "✓"} · auth: ${raw.auth?.error ? "❌" : "✓"}`;
const norm = normalizeUsage(raw);
if (!norm || !norm.rows.length) { setUsageStatus("Fetched, but no usage rows found in response.", true); return; }
S.usage = norm; persistUsage();
setUsageStatus(`Loaded ${norm.rows.length} models from your account.`);
renderUsageTab();
} catch (e) { setUsageStatus("Fetch failed: " + e.message, true); }
});
$("api-clear").addEventListener("click", () => {
localStorage.removeItem("or_api_key");
$("api-key").value = "";
setUsageStatus("Key cleared from this browser.");
});
// ③ Bookmarklet
$("bookmarklet").href = BOOKMARKLET_JS;
$("bm-load").addEventListener("click", () => loadUsageFromText($("bm-paste").value));
// Delegated handlers for inline-editable inputs (re-rendered on every state change)
document.addEventListener("input", e => {
const t = e.target;
if (t.matches?.("[data-input-gpu-price]")) {
const name = t.dataset.inputGpuPrice;
const v = parseFloat(t.value);
if (v > 0) S.priceOverrides[name] = v; else delete S.priceOverrides[name];
persistOverrides();
rerender();
} else if (t.matches?.("[data-input-kwh]")) { $("kwh").value = parseFloat(t.value) || 0; rerender();
} else if (t.matches?.("[data-input-util]")) { $("util").value = (parseFloat(t.value) || 0) / 100; rerender();
} else if (t.matches?.("[data-input-years]")) { $("years").value = parseFloat(t.value) || 1; rerender();
} else if (t.matches?.("[data-input-tpd]")) {
const exp = Math.log10(Math.max(1, parseFloat(t.value) || 1));
$("tpd").value = $("tpd2").value = $("tpd3").value = exp;
rerender();
}
});
// click-to-edit on inline price spans (verdict + cards)
document.addEventListener("click", e => {
const t = e.target.closest("[data-edit-gpu-price]");
if (!t || t.tagName === "INPUT") return;
const name = t.dataset.editGpuPrice;
const cur = S.priceOverrides[name] ?? GPUS.find(g => g.name === name)?.[S.price] ?? 0;
const v = prompt(`Set custom price for ${name} (USD). Leave blank to reset.`, cur);
if (v === null) return;
if (v.trim() === "") delete S.priceOverrides[name];
else { const n = parseFloat(v); if (n > 0) S.priceOverrides[name] = n; }
persistOverrides();
rerender();
});
// reset button for individual GPU
document.addEventListener("click", e => {
const t = e.target.closest("[data-clear-price]");
if (!t) return;
delete S.priceOverrides[t.dataset.clearPrice];
persistOverrides();
rerender();
});
// "Already own a GPU" jump
document.addEventListener("click", e => {
if (e.target.closest("#already-own-btn")) setTab("gpu");
});
renderAllValueLabels();
setTab("model");
}
init();
</script>
</body></html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment