Created
May 5, 2026 20:18
-
-
Save smartexpert/e18e91530034b306d4a81bd7fa58e7d0 to your computer and use it in GitHub Desktop.
OpenRouter × GPU Break-even Calculator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!doctype html> | |
| <html lang="en"><head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width,initial-scale=1" /> | |
| <title>OpenRouter × GPU Breakeven</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <style> | |
| :root { color-scheme: dark; } | |
| html, body { background: #08090a; min-height: 100%; } | |
| body { font-family: 'Inter', ui-sans-serif, system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif; -webkit-font-smoothing: antialiased; } | |
| .mono { font-family: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, Monaco, monospace; } | |
| .glow { box-shadow: 0 0 0 1px rgba(255,255,255,.04), 0 1px 0 rgba(255,255,255,.02) inset; } | |
| .grad-border { position: relative; } | |
| .grad-border::before { content:""; position:absolute; inset:0; padding:1px; border-radius:inherit; | |
| background: linear-gradient(135deg, rgba(118,131,255,.45), rgba(118,131,255,0) 40%, rgba(255,255,255,.04)); | |
| -webkit-mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0); -webkit-mask-composite: xor; mask-composite: exclude; pointer-events:none;} | |
| .verdict-buy { background: linear-gradient(135deg, rgba(34,197,94,.18), rgba(34,197,94,.04)); border-color: rgba(34,197,94,.4); } | |
| .verdict-maybe { background: linear-gradient(135deg, rgba(234,179,8,.16), rgba(234,179,8,.03)); border-color: rgba(234,179,8,.4); } | |
| .verdict-info { background: linear-gradient(135deg, rgba(120,120,140,.14), rgba(120,120,140,.03)); border-color: rgba(255,255,255,.10); } | |
| .verdict-skip { background: linear-gradient(135deg, rgba(244,63,94,.16), rgba(244,63,94,.03)); border-color: rgba(244,63,94,.4); } | |
| .pill { display:inline-flex; align-items:center; gap:.25rem; padding:.125rem .55rem; border-radius:9999px; font-size:.7rem; font-weight:500; } | |
| input[type=range] { accent-color: #7683ff; } | |
| details > summary { list-style: none; cursor: pointer; } | |
| details > summary::-webkit-details-marker { display: none; } | |
| th { font-weight: 500; color: #a1a1aa; font-size: .68rem; text-transform: uppercase; letter-spacing: .04em; } | |
| .num { font-variant-numeric: tabular-nums; } | |
| .row:hover { background: rgba(255,255,255,.025); } | |
| .ghost-input { background: rgba(0,0,0,.35); border: 1px solid rgba(255,255,255,.06); } | |
| .ghost-input:focus-within { border-color: rgba(118,131,255,.45); } | |
| .preset-btn { transition: all .15s; } | |
| .preset-btn[data-active="true"] { background: rgba(118,131,255,.18); color: #c7cfff; border-color: rgba(118,131,255,.5); } | |
| .tab-btn[data-active="true"] { background: rgba(255,255,255,.06); color: #fff; } | |
| .combobox-pop { box-shadow: 0 12px 40px rgba(0,0,0,.55); } | |
| </style> | |
| </head> | |
| <body class="text-zinc-200"> | |
| <header class="border-b border-white/5"> | |
| <div class="max-w-[1180px] mx-auto px-6 py-5 flex items-center justify-between"> | |
| <div class="flex items-center gap-3"> | |
| <div class="h-8 w-8 rounded-lg grad-border glow flex items-center justify-center mono text-xs">≷</div> | |
| <div> | |
| <div class="text-sm font-medium tracking-tight">OpenRouter × GPU Breakeven</div> | |
| <div class="text-[11px] text-zinc-500">2026-05-05 20:13 UTC · 374 models · 214 open-weight · anchor RTX 5070 Ti @ 110 t/s on Qwen3.5-9B Q4_K_M</div> | |
| </div> | |
| </div> | |
| <div class="flex items-center gap-1 bg-zinc-900/60 grad-border glow rounded-lg p-1 text-xs"> | |
| <button data-tab="model" data-active="true" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Pick a model</button> | |
| <button data-tab="gpu" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Evaluate a GPU</button> | |
| <button data-tab="usage" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">My usage</button> | |
| <button data-tab="browse" class="tab-btn rounded-md px-3 py-1.5 text-zinc-400">Browse all</button> | |
| </div> | |
| </div> | |
| </header> | |
| <main class="max-w-[1180px] mx-auto px-6 py-8 space-y-6"> | |
| <!-- ============================================================ MODEL TAB --> | |
| <section data-pane="model" class="space-y-6"> | |
| <div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-6"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">I want to run</div> | |
| <div class="combobox" data-target="model-combo"> | |
| <div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3 cursor-text"> | |
| <span class="text-zinc-500 text-xs mono">model</span> | |
| <input id="model-search" type="text" placeholder="qwen 3.5 9b…" autocomplete="off" | |
| class="bg-transparent flex-1 text-lg font-medium outline-none mono"> | |
| <span id="model-meta" class="text-xs text-zinc-500"></span> | |
| </div> | |
| <div id="model-results" class="combobox-pop hidden mt-2 rounded-xl bg-zinc-950/95 grad-border max-h-72 overflow-y-auto"></div> | |
| </div> | |
| <div class="space-y-3"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Daily usage</div> | |
| <div class="text-sm mono text-zinc-300"><span id="tpd-val"></span> output tok/day</div> | |
| </div> | |
| <input id="tpd" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full"> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-tpd="4" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🐢 casual · 10K</button> | |
| <button data-tpd="5" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💬 power · 100K</button> | |
| <button data-tpd="6" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">⚡ heavy dev · 1M</button> | |
| <button data-tpd="7" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🚀 small team · 10M</button> | |
| <button data-tpd="8" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 prod · 100M</button> | |
| <button data-tpd="9" class="preset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🌐 scale · 1B</button> | |
| </div> | |
| </div> | |
| <div class="space-y-3 pt-2"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Context to load</div> | |
| <div class="text-sm mono text-zinc-300"><span id="ctx-val"></span> tokens</div> | |
| </div> | |
| <input id="ctx" type="range" min="10" max="20.5" step="0.05" value="13" class="w-full"> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-ctx="12" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">4K</button> | |
| <button data-ctx="13" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">8K</button> | |
| <button data-ctx="15" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">32K</button> | |
| <button data-ctx="17" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">128K</button> | |
| <button data-ctx="18" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">256K</button> | |
| <button data-ctx="max" class="ctx-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">model max</button> | |
| </div> | |
| <div id="ctx-warn" class="text-[11px] text-amber-300/80 hidden">Exceeds this model's max context</div> | |
| </div> | |
| <div class="space-y-3 pt-2"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">My budget</div> | |
| <div class="text-[11px] text-zinc-500"><span id="budget-fits-count">—</span> GPUs in range</div> | |
| </div> | |
| <div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3"> | |
| <span class="text-zinc-500 text-sm">$</span> | |
| <input id="budget" type="number" min="100" step="100" value="1000" | |
| class="bg-transparent flex-1 text-2xl font-semibold mono outline-none"> | |
| <span class="text-xs text-zinc-500">USD</span> | |
| </div> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-budget="500" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🪙 ≤$500</button> | |
| <button data-budget="1000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">💵 ≤$1K</button> | |
| <button data-budget="2000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💼 ≤$2K</button> | |
| <button data-budget="3000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🎯 ≤$3K</button> | |
| <button data-budget="10000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">⚙️ workstation ≤$10K</button> | |
| <button data-budget="40000" class="budget-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 datacenter ≤$40K</button> | |
| </div> | |
| <label class="flex items-center gap-2 text-[11px] text-zinc-400 pt-1 cursor-pointer select-none"> | |
| <input id="include-dc" type="checkbox" class="accent-indigo-400"> | |
| <span>Include data-center GPUs (H100, A100, B200…)</span> | |
| <span class="text-zinc-600">— off by default</span> | |
| </label> | |
| <div class="pt-2 border-t border-white/5 flex items-center justify-between gap-3 text-[11px]"> | |
| <div id="settings-strip" class="mono text-zinc-400 truncate">…</div> | |
| <button id="open-tune" class="text-zinc-400 hover:text-indigo-300 underline decoration-dotted underline-offset-4 shrink-0">tune ↓</button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Verdict strip (single row) --> | |
| <div id="verdict-model" class="rounded-xl border-2 grad-border glow px-5 py-4"></div> | |
| <details id="math-panel" class="rounded-xl bg-zinc-900/30 grad-border glow px-5 py-3"> | |
| <summary class="text-[11px] text-zinc-400 hover:text-zinc-200 select-none flex items-center justify-between"> | |
| <span>▸ Show the math · what's actually being computed</span> | |
| <span class="text-zinc-600">click to expand</span> | |
| </summary> | |
| <pre id="math-body" class="mono text-[11px] text-zinc-300 leading-relaxed mt-3 whitespace-pre-wrap"></pre> | |
| </details> | |
| <!-- Ranked GPU answer table --> | |
| <div class="rounded-xl bg-zinc-900/30 grad-border glow overflow-hidden"> | |
| <div class="px-5 py-3 border-b border-white/5 flex items-center justify-between"> | |
| <div class="text-sm font-medium">All GPUs ranked by price</div> | |
| <div class="text-[11px] text-zinc-500" id="gpu-count">—</div> | |
| </div> | |
| <div class="overflow-x-auto"><table class="w-full text-sm"> | |
| <thead><tr> | |
| <th class="text-left px-4 py-2 text-zinc-500">GPU</th> | |
| <th data-sort="price" class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Price</th> | |
| <th class="text-right px-4 py-2">VRAM used</th> | |
| <th data-sort="tps" title="Estimated single-stream output tokens/sec. Anchor: RTX 5070 Ti @ 110 t/s on Qwen3.5-9B Q4_K_M. Scaled by bandwidth ÷ active-param-bytes × engine multiplier. ±25% in practice." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">TPS ⓘ</th> | |
| <th data-sort="payback" title="Days until net API savings (after electricity) recoup the GPU sticker price. Excludes amortization." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Payback ⓘ</th> | |
| <th data-sort="netyr" title="Year-1 P&L while owning. (API spend avoided − electricity − GPU amortized over N years) × 365." class="sort-th text-right px-4 py-2 cursor-pointer hover:text-zinc-300">Year-1 net ⓘ</th> | |
| <th class="text-right px-4 py-2">Verdict</th> | |
| </tr></thead> | |
| <tbody id="gpu-table-body" class="divide-y divide-white/5"></tbody> | |
| </table></div> | |
| <div class="px-5 py-2 border-t border-white/5 flex items-center justify-between text-[11px] text-zinc-500"> | |
| <div id="hint-line"></div> | |
| <label id="show-fails-label" class="flex items-center gap-2 cursor-pointer select-none"><input id="show-fails" type="checkbox" class="accent-indigo-400"><span id="show-fails-text">show GPUs that don't fit</span></label> | |
| </div> | |
| </div> | |
| <!-- API alternative reference --> | |
| <div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden"> | |
| <div class="px-5 py-3 border-b border-white/5 flex items-center justify-between"> | |
| <div class="text-sm font-medium">If you stayed on the API</div> | |
| <div class="text-[11px] text-zinc-500">cheapest provider for this model · plus closed-frontier reference</div> | |
| </div> | |
| <div id="api-table"></div> | |
| </div> | |
| </section> | |
| <!-- ============================================================ GPU TAB --> | |
| <section data-pane="gpu" class="space-y-6 hidden"> | |
| <div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-5"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">I'm considering</div> | |
| <select id="gpu-select" class="ghost-input rounded-xl px-4 py-3 text-lg font-medium outline-none w-full mono"></select> | |
| <div class="space-y-2"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">at the price I can pay</div> | |
| <div class="text-[11px] text-zinc-500" id="gpu-price-context"></div> | |
| </div> | |
| <div class="ghost-input rounded-xl px-4 py-3 flex items-center gap-3"> | |
| <span class="text-zinc-500 text-sm">$</span> | |
| <input id="gpu-price-headline" type="number" min="0" step="50" | |
| class="bg-transparent flex-1 text-2xl font-semibold mono outline-none"> | |
| <span class="text-xs text-zinc-500">USD</span> | |
| </div> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-priceset="msrp" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">use MSRP</button> | |
| <button data-priceset="street" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">use street</button> | |
| <button data-priceset="reset" class="gpu-priceset-btn rounded-full border border-white/10 px-2.5 py-1 text-rose-300/70">reset override</button> | |
| </div> | |
| </div> | |
| <div id="gpu-summary" class="grid grid-cols-2 md:grid-cols-5 gap-3 text-xs"></div> | |
| <div class="space-y-3 pt-2"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Daily usage</div> | |
| <div class="text-sm mono text-zinc-300"><span id="tpd-val2"></span> output tok/day</div> | |
| </div> | |
| <input id="tpd2" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full"> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-tpd="4" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🐢 casual · 10K</button> | |
| <button data-tpd="5" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">💬 power · 100K</button> | |
| <button data-tpd="6" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">⚡ heavy · 1M</button> | |
| <button data-tpd="7" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🚀 team · 10M</button> | |
| <button data-tpd="8" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🏢 prod · 100M</button> | |
| <button data-tpd="9" class="preset-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">🌐 scale · 1B</button> | |
| </div> | |
| </div> | |
| <div class="space-y-3 pt-2"> | |
| <div class="flex items-center justify-between"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Context per request</div> | |
| <div class="text-sm mono text-zinc-300"><span id="ctx-val2"></span> tokens</div> | |
| </div> | |
| <input id="ctx2" type="range" min="10" max="20.5" step="0.05" value="13" class="w-full"> | |
| <div class="flex flex-wrap gap-1.5 text-[11px] mono"> | |
| <button data-ctx="12" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">4K</button> | |
| <button data-ctx="13" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400" data-active="true">8K</button> | |
| <button data-ctx="15" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">32K</button> | |
| <button data-ctx="17" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">128K</button> | |
| <button data-ctx="18" class="ctx-btn2 rounded-full border border-white/10 px-2.5 py-1 text-zinc-400">256K</button> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="verdict-gpu" class="rounded-2xl border-2 grad-border glow p-6 space-y-3"></div> | |
| <div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden"> | |
| <div class="px-5 py-3 border-b border-white/5 flex items-center justify-between gap-3 flex-wrap"> | |
| <div class="text-sm font-medium shrink-0">Models that fit</div> | |
| <div class="flex items-center gap-2"> | |
| <input id="gpu-model-search" type="text" placeholder="filter…" class="ghost-input rounded-md px-2 py-1 mono text-[11px] w-32"> | |
| <select id="gpu-model-sort" class="ghost-input rounded-md px-2 py-1 mono text-[11px]"> | |
| <option value="created">Newest</option> | |
| <option value="tps">Fastest TPS</option> | |
| <option value="dailySave">Highest savings</option> | |
| <option value="payback">Best payback</option> | |
| <option value="params">Largest params</option> | |
| </select> | |
| <label class="flex items-center gap-1.5 text-[11px] text-zinc-400"><input id="gpu-mainstream-only" type="checkbox" checked class="accent-indigo-400">mainstream only</label> | |
| </div> | |
| </div> | |
| <div id="gpu-models" class="overflow-x-auto"></div> | |
| <div class="px-5 py-2 border-t border-white/5 text-[11px] text-zinc-500" id="gpu-models-footer"></div> | |
| </div> | |
| <div class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden"> | |
| <div class="px-5 py-3 border-b border-white/5 text-sm font-medium">Bigger GPUs to consider</div> | |
| <div id="bigger-gpus" class="divide-y divide-white/5"></div> | |
| </div> | |
| </section> | |
| <!-- ============================================================ USAGE TAB --> | |
| <section data-pane="usage" class="space-y-6 hidden"> | |
| <div class="rounded-2xl bg-zinc-900/40 grad-border glow p-6 space-y-5"> | |
| <div class="flex items-start justify-between gap-4 flex-wrap"> | |
| <div> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Connect your OpenRouter usage</div> | |
| <div class="text-lg font-medium tracking-tight mt-1">Use your real spend instead of guessing</div> | |
| <div class="text-xs text-zinc-500 mt-1">All three options run entirely in your browser. Your key never leaves this page.</div> | |
| </div> | |
| <div id="usage-status" class="text-xs text-zinc-400"></div> | |
| </div> | |
| <!-- Sub-tabs for import method --> | |
| <div class="flex items-center gap-1 bg-zinc-950/60 rounded-lg p-1 text-xs"> | |
| <button data-utab="paste" data-active="true" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">① Paste / drop file</button> | |
| <button data-utab="connect" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">② Connect with API key</button> | |
| <button data-utab="bookmarklet" class="utab-btn rounded-md px-3 py-1.5 text-zinc-400">③ Bookmarklet</button> | |
| </div> | |
| <!-- ① Paste / drop --> | |
| <div data-upane="paste" class="space-y-3"> | |
| <div class="text-xs text-zinc-400">Run this once in your terminal, then paste the result below or drop the file:</div> | |
| <div class="flex items-stretch gap-2"> | |
| <code id="curl-snippet" class="flex-1 ghost-input rounded-md px-3 py-2 mono text-[11px] overflow-x-auto whitespace-nowrap">curl -s "https://openrouter.ai/api/v1/analytics" -H "Authorization: Bearer $OPENROUTER_API_KEY" > usage.json</code> | |
| <button id="copy-curl" class="ghost-input rounded-md px-3 text-xs hover:bg-white/5">copy</button> | |
| </div> | |
| <textarea id="usage-paste" placeholder='paste the full JSON response here (or drop the file anywhere on this card)' class="ghost-input rounded-md px-3 py-2 mono text-[11px] w-full h-40"></textarea> | |
| <div class="flex gap-2"> | |
| <button id="usage-load" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Load</button> | |
| <input type="file" id="usage-file" accept=".json,application/json" class="hidden"> | |
| <button id="usage-pickfile" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-white/5">Pick file…</button> | |
| </div> | |
| </div> | |
| <!-- ② Connect --> | |
| <div data-upane="connect" class="space-y-3 hidden"> | |
| <div class="text-xs text-zinc-400">Paste your API key — it's stored only in your browser's localStorage and used to call <span class="mono">openrouter.ai</span> directly.</div> | |
| <div class="flex gap-2"> | |
| <input id="api-key" type="password" placeholder="sk-or-v1-…" autocomplete="off" class="ghost-input rounded-md px-3 py-2 mono text-xs flex-1"> | |
| <button id="api-fetch" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Fetch usage</button> | |
| <button id="api-clear" class="ghost-input rounded-md px-3 py-1.5 text-xs hover:bg-rose-500/15 text-rose-300">Clear</button> | |
| </div> | |
| <div id="api-status" class="text-[11px] text-zinc-500"></div> | |
| <div class="text-[10px] text-zinc-600">Hits <span class="mono">/api/v1/credits</span>, <span class="mono">/api/v1/auth/key</span>, <span class="mono">/api/v1/analytics</span>. Verify in DevTools — no other origins are contacted.</div> | |
| </div> | |
| <!-- ③ Bookmarklet --> | |
| <div data-upane="bookmarklet" class="space-y-3 hidden"> | |
| <div class="text-xs text-zinc-400">Drag this link to your bookmarks bar. Then visit <span class="mono">openrouter.ai</span> while logged in and click the bookmark — it copies your usage to the clipboard. Paste it back here.</div> | |
| <div class="flex items-center gap-3"> | |
| <a id="bookmarklet" class="ghost-input rounded-md px-4 py-2 text-xs hover:bg-indigo-500/15 cursor-grab" draggable="true">📊 Grab my OpenRouter usage</a> | |
| <span class="text-[10px] text-zinc-500">drag to bookmarks bar →</span> | |
| </div> | |
| <textarea id="bm-paste" placeholder="…then paste the clipboard contents here" class="ghost-input rounded-md px-3 py-2 mono text-[11px] w-full h-32"></textarea> | |
| <button id="bm-load" class="ghost-input rounded-md px-4 py-1.5 text-xs hover:bg-indigo-500/15">Load</button> | |
| </div> | |
| </div> | |
| <!-- Aggregate KPIs --> | |
| <div id="usage-kpis" class="grid grid-cols-1 md:grid-cols-3 gap-4 hidden"></div> | |
| <!-- Open vs closed split --> | |
| <div id="usage-split" class="grid grid-cols-1 md:grid-cols-2 gap-4 hidden"></div> | |
| <!-- Per-model rows --> | |
| <div id="usage-rows" class="rounded-2xl bg-zinc-900/30 grad-border glow overflow-hidden hidden"> | |
| <div class="px-5 py-3 border-b border-white/5 flex items-center justify-between"> | |
| <div class="text-sm font-medium">Per-model · what it cost vs what it would cost on local hardware</div> | |
| <div id="usage-gpu-pick" class="text-[11px] text-zinc-500"></div> | |
| </div> | |
| <div id="usage-rows-body" class="divide-y divide-white/5"></div> | |
| </div> | |
| <div id="usage-empty" class="rounded-2xl bg-zinc-900/30 grad-border glow p-8 text-center text-sm text-zinc-500"> | |
| No usage data loaded yet · pick an option above | |
| </div> | |
| </section> | |
| <!-- ============================================================ BROWSE TAB --> | |
| <section data-pane="browse" class="grid grid-cols-12 gap-6 hidden"> | |
| <aside class="col-span-3 space-y-4"> | |
| <section class="rounded-xl bg-zinc-900/40 grad-border glow p-4 space-y-3 text-xs"> | |
| <div class="text-[11px] uppercase tracking-wider text-zinc-500">Browse · cost mode</div> | |
| <div class="grid grid-cols-3 gap-1 mono"> | |
| <button data-mode="payback" data-active="true" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">Payback</button> | |
| <button data-mode="amortized" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">Amort.</button> | |
| <button data-mode="tco" class="mode-btn rounded-md px-2 py-1.5 text-zinc-400">TCO</button> | |
| </div> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Tokens / day</span><span id="tpd-val3" class="mono text-zinc-300"></span></div> | |
| <input id="tpd3" type="range" min="3.5" max="10" step="0.05" value="6" class="w-full"> | |
| </label> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Max VRAM</span><span id="vram-val" class="mono text-zinc-300"></span></div> | |
| <input id="vram" type="range" min="8" max="200" step="1" value="200" class="w-full"> | |
| </label> | |
| <label class="flex items-center gap-2"><input id="open-only" type="checkbox" checked class="accent-indigo-400">Open-weight only</label> | |
| <label class="block"><input id="search" type="text" placeholder="search…" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></label> | |
| </section> | |
| </aside> | |
| <section class="col-span-9 space-y-4"> | |
| <div id="kpis" class="grid grid-cols-3 gap-4"></div> | |
| <div class="rounded-xl bg-zinc-900/30 grad-border glow overflow-hidden"> | |
| <div class="px-4 py-3 border-b border-white/5 text-sm font-medium">Models · click row for providers</div> | |
| <div id="model-table" class="divide-y divide-white/5"></div> | |
| </div> | |
| </section> | |
| </section> | |
| <!-- ============================================================ TUNE DRAWER (shared) --> | |
| <details class="max-w-[1180px] mx-auto px-6 mt-2 mb-10"> | |
| <summary class="rounded-xl bg-zinc-900/30 grad-border glow px-5 py-3 text-sm flex items-center justify-between"> | |
| <span class="flex items-center gap-2"><span class="text-zinc-500">▸</span> Tune assumptions</span> | |
| <span class="text-[11px] text-zinc-500">engine · quant · electricity · amortization · I:O ratio</span> | |
| </summary> | |
| <div class="rounded-xl bg-zinc-950/40 grad-border glow mt-2 p-5 grid grid-cols-1 md:grid-cols-3 gap-5 text-xs"> | |
| <div class="space-y-3"> | |
| <div class="text-[11px] uppercase tracking-wider text-zinc-500">Inference engine</div> | |
| <select id="engine" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></select> | |
| <div id="engine-help" class="text-[11px] text-zinc-500 leading-snug"></div> | |
| <div class="grid grid-cols-2 gap-1 mono"> | |
| <button data-conc="single" data-active="true" class="conc-btn rounded-md px-2 py-1 text-zinc-400">Single-stream</button> | |
| <button data-conc="batched" class="conc-btn rounded-md px-2 py-1 text-zinc-400">Batched serving</button> | |
| </div> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Speculative decode</span><span id="spec-val" class="mono text-zinc-300"></span></div> | |
| <input id="spec" type="range" min="1.0" max="2.5" step="0.05" value="1.0" class="w-full"> | |
| </label> | |
| </div> | |
| <div class="space-y-3"> | |
| <div class="text-[11px] uppercase tracking-wider text-zinc-500">Local stack</div> | |
| <select id="quant" class="w-full ghost-input rounded-md px-2 py-1.5 mono"></select> | |
| <div id="quant-help" class="text-[11px] text-zinc-500 leading-snug"></div> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>KV cache quant</span></div> | |
| <div class="grid grid-cols-3 gap-1 mono mt-1"> | |
| <button data-kv="FP16" class="kv-btn rounded-md px-2 py-1 text-zinc-400">FP16</button> | |
| <button data-kv="Q8" data-active="true" class="kv-btn rounded-md px-2 py-1 text-zinc-400">Q8</button> | |
| <button data-kv="Q4" class="kv-btn rounded-md px-2 py-1 text-zinc-400">Q4</button> | |
| </div> | |
| </label> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Min TPS target</span><span id="mintps-val" class="mono text-zinc-300"></span></div> | |
| <input id="mintps" type="range" min="0" max="200" step="5" value="30" class="w-full"> | |
| </label> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Input:Output ratio</span><span id="io-val" class="mono text-zinc-300"></span></div> | |
| <input id="io" type="range" min="0" max="10" step="0.5" value="3" class="w-full"> | |
| <div class="text-[10px] text-zinc-600 mt-0.5">e.g. 3:1 = 3 input tokens for every 1 output token. Typical chat ≈ 3:1, agent loops ≈ 10:1+.</div> | |
| </label> | |
| <div class="grid grid-cols-2 gap-1 mono"> | |
| <button data-price="street" data-active="true" class="price-btn rounded-md px-2 py-1 text-zinc-400">Street</button> | |
| <button data-price="msrp" class="price-btn rounded-md px-2 py-1 text-zinc-400">MSRP</button> | |
| </div> | |
| </div> | |
| <div class="space-y-3"> | |
| <div class="text-[11px] uppercase tracking-wider text-zinc-500">Cost model</div> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Amortize over</span><span id="years-val" class="mono text-zinc-300"></span></div> | |
| <input id="years" type="range" min="1" max="7" step="0.5" value="3" class="w-full"> | |
| </label> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>GPU utilization</span><span id="util-val" class="mono text-zinc-300"></span></div> | |
| <input id="util" type="range" min="0.05" max="1.0" step="0.05" value="0.4" class="w-full"> | |
| </label> | |
| <label class="block"> | |
| <div class="flex justify-between"><span>Electricity $/kWh</span><span id="kwh-val" class="mono text-zinc-300"></span></div> | |
| <input id="kwh" type="range" min="0.0" max="0.40" step="0.005" value="0.082" class="w-full"> | |
| <div class="text-[10px] text-zinc-600 mt-0.5">UAE ≈ 0.08 · US 0.16 · EU 0.30</div> | |
| </label> | |
| </div> | |
| </div> | |
| </details> | |
| <footer class="max-w-[1180px] mx-auto px-6 pb-12 text-[11px] text-zinc-600 leading-relaxed"> | |
| Methodology: API pricing from <span class="mono">openrouter.ai/api/v1/models/{id}/endpoints</span>. Local TPS = anchor (110 t/s on Qwen3-9B Q4 @ RTX 5070 Ti) × bandwidth_ratio ÷ model_size_ratio × engine_multiplier × speculative. Memory-bound single-stream model, ±25% real-world. VRAM = params × bytes × 1.2 overhead. Verdict thresholds: BUY if payback < 1y, MAYBE 1-3y, SKIP > 3y. <strong>Point-in-time POC.</strong> Verify hardware prices before purchase. | |
| </footer> | |
| <script> | |
| const DATA = [{"id": "ibm-granite/granite-4.1-8b", "name": "IBM: Granite 4.1 8B", "hf": "ibm-granite/granite-4.1-8b", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "WandB", "tag": "wandb/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "WandB", "tag": "wandb/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free", "name": "NVIDIA: Nemotron 3 Nano Omni (free)", "hf": null, "context": 256000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.81825212683681, "uptime_1d": 97.62081971969573}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.81825212683681, "uptime_1d": 97.62081971969573}}, {"id": "qwen/qwen3.6-35b-a3b", "name": "Qwen: Qwen3.6 35B A3B", "hf": "Qwen/Qwen3.6-35B-A3B", "context": 262144, "open_weight": true, "params_total_b": 35.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.14, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.16119999999999998, "completion_per_mtok": 0.9652499999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94186046511628, "uptime_1d": 99.94077207826547}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7656680490705}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92692224131461}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.16119999999999998, "completion_per_mtok": 0.9652499999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94186046511628, "uptime_1d": 99.94077207826547}}, {"id": "qwen/qwen3.6-27b", "name": "Qwen: Qwen3.6 27B", "hf": "Qwen/Qwen3.6-27B", "context": 262144, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.5, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48717948717949, "uptime_1d": 98.82017631073109}, {"provider": "Morph", "tag": "morph", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.55, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 44.611973392461195, "uptime_1d": 95.61675882603147}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 81920, "prompt_per_mtok": 0.32, "completion_per_mtok": 3.1999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95503597122301, "uptime_1d": 98.80547562995902}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.325, "completion_per_mtok": 3.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.79561793906196}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.54010631308606}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.5, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48717948717949, "uptime_1d": 98.82017631073109}}, {"id": "google/gemma-4-26b-a4b-it:free", "name": "Google: Gemma 4 26B A4B (free)", "hf": "google/gemma-4-26B-A4B-it", "context": 262144, "open_weight": true, "params_total_b": 26.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.10400000000000001, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.87583083777665}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.87583083777665}}, {"id": "google/gemma-4-26b-a4b-it", "name": "Google: Gemma 4 26B A4B ", "hf": "google/gemma-4-26B-A4B-it", "context": 262144, "open_weight": true, "params_total_b": 26.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.10400000000000001, "providers": [{"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.33, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.92884371029224, "uptime_1d": 67.14721482834327}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 256000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94018244354717, "uptime_1d": 99.87809761613116}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7280629340067, "uptime_1d": 99.7397940910503}, {"provider": "Ionstream", "tag": "ionstream/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.35, "throughput_tps": null, "latency_ms": null, "uptime_30m": 63.57952325127003, "uptime_1d": 49.62655135620823}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.82595431466203}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.48339483394834, "uptime_1d": 99.81930536943634}, {"provider": "NextBit", "tag": "nextbit/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.12445364619278, "uptime_1d": 99.00234285917048}, {"provider": "Io Net", "tag": "io-net/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66058549002969, "uptime_1d": 97.35400038703527}, {"provider": "Venice", "tag": "venice/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.1625, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.35728447055057}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.76572142801344, "uptime_1d": 99.76022617471281}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.33, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.92884371029224, "uptime_1d": 67.14721482834327}}, {"id": "google/gemma-4-31b-it:free", "name": "Google: Gemma 4 31B (free)", "hf": "google/gemma-4-31B-it", "context": 262144, "open_weight": true, "params_total_b": 31.0, "params_active_b": 31.0, "kv_gb_per_1k": 0.124, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.91177767975297}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 262144, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.91177767975297}}, {"id": "google/gemma-4-31b-it", "name": "Google: Gemma 4 31B", "hf": "google/gemma-4-31B-it", "context": 262144, "open_weight": true, "params_total_b": 31.0, "params_active_b": 31.0, "kv_gb_per_1k": 0.124, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7056614497696, "uptime_1d": 99.53112885595147}, {"provider": "Chutes", "tag": "chutes/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.65674782085964, "uptime_1d": 96.84352083731063}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.55222665472604, "uptime_1d": 98.23305193498936}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.8361738024794, "uptime_1d": 99.55852663839724}, {"provider": "Venice", "tag": "venice/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.175, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96374184191443, "uptime_1d": 98.14838462520751}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.60205992509363, "uptime_1d": 99.05116165942282}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7056614497696, "uptime_1d": 99.53112885595147}}, {"id": "nvidia/nemotron-3-super-120b-a12b:free", "name": "NVIDIA: Nemotron 3 Super (free)", "hf": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "context": 262144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 12.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.13816189896531, "uptime_1d": 98.56685729155443}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.13816189896531, "uptime_1d": 98.56685729155443}}, {"id": "nvidia/nemotron-3-super-120b-a12b", "name": "NVIDIA: Nemotron 3 Super", "hf": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", "context": 262144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 12.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.16168271667512, "uptime_1d": 78.29774218663108}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.21918572225321, "uptime_1d": 99.77634402608622}, {"provider": "Nebius", "tag": "nebius/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": null, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.10126582278481, "uptime_1d": 98.45385347288297}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.16168271667512, "uptime_1d": 78.29774218663108}}, {"id": "qwen/qwen3.5-9b", "name": "Qwen: Qwen3.5-9B", "hf": "Qwen/Qwen3.5-9B", "context": 262144, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.036000000000000004, "providers": [{"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.4863387978142, "uptime_1d": 96.8969518580364}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.85093557704383}], "cheapest": {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.4863387978142, "uptime_1d": 96.8969518580364}}, {"id": "qwen/qwen3.5-35b-a3b", "name": "Qwen: Qwen3.5-35B-A3B", "hf": "Qwen/Qwen3.5-35B-A3B", "context": 262144, "open_weight": true, "params_total_b": 35.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.14, "providers": [{"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97316876844647, "uptime_1d": 99.97374382207578}, {"provider": "DekaLLM", "tag": "dekallm/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.19, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 76.9607843137255}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.1625, "completion_per_mtok": 1.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 89.35596765758879}, {"provider": "Venice", "tag": "venice", "context": 256000, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3125, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.18963845407951}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.22499999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70370370370371, "uptime_1d": 99.62541116977069}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96597125880167}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 90.98641993732278}], "cheapest": {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97316876844647, "uptime_1d": 99.97374382207578}}, {"id": "qwen/qwen3.5-27b", "name": "Qwen: Qwen3.5-27B", "hf": "Qwen/Qwen3.5-27B", "context": 262144, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.195, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.55936413313464, "uptime_1d": 94.58952366365446}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.27, "completion_per_mtok": 2.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.51768488745981, "uptime_1d": 98.97541385052405}, {"provider": "Phala", "tag": "phala", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 85.5402042392517}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.8542539624704, "uptime_1d": 99.75450996412069}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.195, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.55936413313464, "uptime_1d": 94.58952366365446}}, {"id": "qwen/qwen3.5-122b-a10b", "name": "Qwen: Qwen3.5-122B-A10B", "hf": "Qwen/Qwen3.5-122B-A10B", "context": 262144, "open_weight": true, "params_total_b": 122.0, "params_active_b": 10.0, "kv_gb_per_1k": 0.488, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86768111147867, "uptime_1d": 99.91762490753815}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 2.4, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.80376124284545}, {"provider": "Novita", "tag": "novita/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 3.1999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92791139655284}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.5, "completion_per_mtok": 4.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.84482758620689, "uptime_1d": 96.0263616980035}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86768111147867, "uptime_1d": 99.91762490753815}}, {"id": "liquid/lfm-2-24b-a2b", "name": "LiquidAI: LFM2-24B-A2B", "hf": "LiquidAI/LFM2-24B-A2B", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 2.0, "kv_gb_per_1k": 0.384, "providers": [{"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3.5-397b-a17b", "name": "Qwen: Qwen3.5 397B A17B", "hf": "Qwen/Qwen3.5-397B-A17B", "context": 262144, "open_weight": true, "params_total_b": 397.0, "params_active_b": 17.0, "kv_gb_per_1k": 1.588, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.70418006430869, "uptime_1d": 81.89601276855522}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95128531814434, "uptime_1d": 99.35039110484392}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.55, "completion_per_mtok": 3.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.6135169953424, "uptime_1d": 99.15288328223555}, {"provider": "Morph", "tag": "morph", "context": 262144, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.55, "completion_per_mtok": 3.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.76261803972646, "uptime_1d": 87.50405550425516}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.5, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96552912788694, "uptime_1d": 99.95901333929503}, {"provider": "Novita", "tag": "novita", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.19583727530747, "uptime_1d": 99.38172203335918}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.6, "completion_per_mtok": 3.5999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 85.56473829201103, "uptime_1d": 89.45778239823268}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.75, "completion_per_mtok": 4.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.42857142857143, "uptime_1d": 93.84131493506493}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.39, "completion_per_mtok": 2.34, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.70418006430869, "uptime_1d": 81.89601276855522}}, {"id": "liquid/lfm-2.5-1.2b-thinking:free", "name": "LiquidAI: LFM2.5-1.2B-Thinking (free)", "hf": "LiquidAI/LFM2.5-1.2B-Thinking", "context": 32768, "open_weight": true, "params_total_b": 1.2, "params_active_b": 1.2, "kv_gb_per_1k": 0.0192, "providers": [{"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 92.04306800366788}], "cheapest": {"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 92.04306800366788}}, {"id": "liquid/lfm-2.5-1.2b-instruct:free", "name": "LiquidAI: LFM2.5-1.2B-Instruct (free)", "hf": "LiquidAI/LFM2.5-1.2B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 1.2, "params_active_b": 1.2, "kv_gb_per_1k": 0.0192, "providers": [{"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 93.35464044253227}], "cheapest": {"provider": "Liquid", "tag": "liquid", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 93.35464044253227}}, {"id": "allenai/olmo-3.1-32b-instruct", "name": "AllenAI: Olmo 3.1 32B Instruct", "hf": "allenai/Olmo-3.1-32B-Instruct", "context": 65536, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 65536, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 65536, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-3-nano-30b-a3b:free", "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)", "hf": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "context": 256000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "Nvidia", "tag": "nvidia/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98661597618367}], "cheapest": {"provider": "Nvidia", "tag": "nvidia/bf16", "context": 256000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98661597618367}}, {"id": "nvidia/nemotron-3-nano-30b-a3b", "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "hf": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "context": 262144, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": 228000, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp4", "context": 262144, "quantization": "fp4", "max_completion_tokens": 228000, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/ministral-14b-2512", "name": "Mistral: Ministral 3 14B 2512", "hf": "mistralai/Ministral-3-14B-Instruct-2512", "context": 262144, "open_weight": true, "params_total_b": 14.0, "params_active_b": 14.0, "kv_gb_per_1k": 0.056, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.8841642228739, "uptime_1d": 99.11579237942428}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.35, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.1578947368421, "uptime_1d": 97.52548071871388}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.8841642228739, "uptime_1d": 99.11579237942428}}, {"id": "mistralai/ministral-8b-2512", "name": "Mistral: Ministral 3 8B 2512", "hf": "mistralai/Ministral-3-8B-Instruct-2512", "context": 262144, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.03027285401315, "uptime_1d": 98.71209967718057}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.10968775020017, "uptime_1d": 95.64102267869727}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 88.03027285401315, "uptime_1d": 98.71209967718057}}, {"id": "mistralai/ministral-3b-2512", "name": "Mistral: Ministral 3 3B 2512", "hf": "mistralai/Ministral-3-3B-Instruct-2512", "context": 131072, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.73936671255706, "uptime_1d": 99.81908279573487}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66287399915718, "uptime_1d": 99.6376727261442}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.73936671255706, "uptime_1d": 99.81908279573487}}, {"id": "allenai/olmo-3-32b-think", "name": "AllenAI: Olmo 3 32B Think", "hf": "allenai/Olmo-3-32B-Think", "context": 65536, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [], "cheapest": null}, {"id": "deepcogito/cogito-v2.1-671b", "name": "Deep Cogito: Cogito v2.1 671B", "hf": "", "context": 128000, "open_weight": false, "params_total_b": 671.0, "params_active_b": 671.0, "kv_gb_per_1k": 10.736, "providers": [{"provider": "Together", "tag": "together", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 1.25, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Together", "tag": "together", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 1.25, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/voxtral-small-24b-2507", "name": "Mistral: Voxtral Small 24B 2507", "hf": "mistralai/Voxtral-Small-24B-2507", "context": 32000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 32000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.49579831932773, "uptime_1d": 99.83273090825195}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 32000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.49579831932773, "uptime_1d": 99.83273090825195}}, {"id": "openai/gpt-oss-safeguard-20b", "name": "OpenAI: gpt-oss-safeguard-20b", "hf": "openai/gpt-oss-safeguard-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nvidia/nemotron-nano-12b-v2-vl:free", "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", "hf": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "context": 128000, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "Nvidia", "tag": "nvidia", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94242947610823, "uptime_1d": 93.32141994687274}], "cheapest": {"provider": "Nvidia", "tag": "nvidia", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94242947610823, "uptime_1d": 93.32141994687274}}, {"id": "nvidia/nemotron-nano-12b-v2-vl", "name": "NVIDIA: Nemotron Nano 12B 2 VL", "hf": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "context": 131072, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-32b-instruct", "name": "Qwen: Qwen3 VL 32B Instruct", "hf": "Qwen/Qwen3-VL-32B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-8b-thinking", "name": "Qwen: Qwen3 VL 8B Thinking", "hf": "Qwen/Qwen3-VL-8B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 1.365, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 1.365, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-8b-instruct", "name": "Qwen: Qwen3 VL 8B Instruct", "hf": "Qwen/Qwen3-VL-8B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99264920580795}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.77595103804153, "uptime_1d": 98.67297004497782}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32000, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.62881896944823, "uptime_1d": 98.54493264902968}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 57.03141038801067, "uptime_1d": 93.08841169202464}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99264920580795}}, {"id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "hf": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "context": 131072, "open_weight": true, "params_total_b": 49.0, "params_active_b": 49.0, "kv_gb_per_1k": 0.196, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98701298701299}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98701298701299}}, {"id": "baidu/ernie-4.5-21b-a3b-thinking", "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "hf": "baidu/ERNIE-4.5-21B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 21.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.336, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "qwen/qwen3-vl-30b-a3b-thinking", "name": "Qwen: Qwen3 VL 30B A3B Thinking", "hf": "Qwen/Qwen3-VL-30B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.5791518290709}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.29, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 82.30994152046783}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.5791518290709}}, {"id": "qwen/qwen3-vl-30b-a3b-instruct", "name": "Qwen: Qwen3 VL 30B A3B Instruct", "hf": "Qwen/Qwen3-VL-30B-A3B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98262682418346, "uptime_1d": 99.98423816759241}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32000, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 81.08558108558108, "uptime_1d": 91.8072598504381}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9204665959703, "uptime_1d": 99.45929958500088}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.23119165293795, "uptime_1d": 94.2152584953061}, {"provider": "Phala", "tag": "phala", "context": 128000, "quantization": "unknown", "max_completion_tokens": 128000, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.73251942286349, "uptime_1d": 97.9021590789796}, {"provider": "Venice", "tag": "venice", "context": 128000, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.08633093525181, "uptime_1d": 94.28436871173524}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.29, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.22766570605188, "uptime_1d": 92.87602114368092}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98262682418346, "uptime_1d": 99.98423816759241}}, {"id": "thedrummer/cydonia-24b-v4.1", "name": "TheDrummer: Cydonia 24B V4.1", "hf": "thedrummer/cydonia-24b-v4.1", "context": 131072, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.384, "providers": [{"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-vl-235b-a22b-thinking", "name": "Qwen: Qwen3 VL 235B A22B Thinking", "hf": "Qwen/Qwen3-VL-235B-A22B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86282578875172, "uptime_1d": 99.6266156055529}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.98, "completion_per_mtok": 3.95, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98378071527046}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 2.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86282578875172, "uptime_1d": 99.6266156055529}}, {"id": "qwen/qwen3-vl-235b-a22b-instruct", "name": "Qwen: Qwen3 VL 235B A22B Instruct", "hf": "Qwen/Qwen3-VL-235B-A22B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 25.523560209424083, "uptime_1d": 82.78650453427547}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.26, "completion_per_mtok": 1.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95923359152059, "uptime_1d": 99.7187006009738}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.25, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59475218658892, "uptime_1d": 93.93047112462007}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 87.46155996645233, "uptime_1d": 87.45459364233041}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 74.50090744101632, "uptime_1d": 85.02783074121258}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.21, "completion_per_mtok": 1.9, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.25023488881929, "uptime_1d": 97.22329817101271}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 25.523560209424083, "uptime_1d": 82.78650453427547}}, {"id": "alibaba/tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "hf": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.48, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-next-80b-a3b-thinking", "name": "Qwen: Qwen3 Next 80B A3B Thinking", "hf": "Qwen/Qwen3-Next-80B-A3B-Thinking", "context": 131072, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.42528735632183}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.17355371900827}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.89270386266095}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.77011494252874}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.42528735632183}}, {"id": "qwen/qwen3-next-80b-a3b-instruct:free", "name": "Qwen: Qwen3 Next 80B A3B Instruct (free)", "hf": "Qwen/Qwen3-Next-80B-A3B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 262144, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 63.48641655886158}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 262144, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 63.48641655886158}}, {"id": "qwen/qwen3-next-80b-a3b-instruct", "name": "Qwen: Qwen3 Next 80B A3B Instruct", "hf": "Qwen/Qwen3-Next-80B-A3B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 80.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96499008052282, "uptime_1d": 99.38025615176959}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92704449749216}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98475609756098, "uptime_1d": 99.87787358738768}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98994344279677}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96375902539656}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.677245831092, "uptime_1d": 95.45495053916763}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.0975, "completion_per_mtok": 0.78, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96499008052282, "uptime_1d": 99.38025615176959}}, {"id": "nvidia/nemotron-nano-9b-v2:free", "name": "NVIDIA: Nemotron Nano 9B V2 (free)", "hf": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "context": 128000, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.14400000000000002, "providers": [{"provider": "Nvidia", "tag": "nvidia/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.93698802772528, "uptime_1d": 98.51666154715075}], "cheapest": {"provider": "Nvidia", "tag": "nvidia/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.93698802772528, "uptime_1d": 98.51666154715075}}, {"id": "nvidia/nemotron-nano-9b-v2", "name": "NVIDIA: Nemotron Nano 9B V2", "hf": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "context": 131072, "open_weight": true, "params_total_b": 9.0, "params_active_b": 9.0, "kv_gb_per_1k": 0.14400000000000002, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-30b-a3b-thinking-2507", "name": "Qwen: Qwen3 30B A3B Thinking 2507", "hf": "Qwen/Qwen3-30B-A3B-Thinking-2507", "context": 131072, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 98.68203691733103}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.97065823362567}, {"provider": "Alibaba", "tag": "alibaba", "context": 81920, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 1.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 98.68203691733103}}, {"id": "nousresearch/hermes-4-70b", "name": "Nous: Hermes 4 70B", "hf": "NousResearch/Hermes-4-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nousresearch/hermes-4-405b", "name": "Nous: Hermes 4 405B", "hf": "NousResearch/Hermes-4-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 6.48, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 1.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 1.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "baidu/ernie-4.5-21b-a3b", "name": "Baidu: ERNIE 4.5 21B A3B", "hf": "baidu/ERNIE-4.5-21B-A3B-PT", "context": 120000, "open_weight": true, "params_total_b": 21.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.336, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 120000, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 120000, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "baidu/ernie-4.5-vl-28b-a3b", "name": "Baidu: ERNIE 4.5 VL 28B A3B", "hf": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "context": 30000, "open_weight": true, "params_total_b": 28.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.448, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 30000, "quantization": "fp16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 30000, "quantization": "fp16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "openai/gpt-oss-120b:free", "name": "OpenAI: gpt-oss-120b (free)", "hf": "openai/gpt-oss-120b", "context": 131072, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.87826034026234, "uptime_1d": 99.3890139848677}], "cheapest": {"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.87826034026234, "uptime_1d": 99.3890139848677}}, {"id": "openai/gpt-oss-120b", "name": "OpenAI: gpt-oss-120b", "hf": "openai/gpt-oss-120b", "context": 131072, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.5614476485388, "uptime_1d": 47.60379344547681}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99256422649367, "uptime_1d": 99.51077552678176}, {"provider": "Novita", "tag": "novita/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 32768, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99120492524186, "uptime_1d": 98.2916975651959}, {"provider": "Google", "tag": "google-vertex", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.36, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.74634671588987, "uptime_1d": 99.00415432626369}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.07390300230946, "uptime_1d": 92.51337360538574}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94251536345345}, {"provider": "Phala", "tag": "phala", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.49, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47937525030036, "uptime_1d": 96.72812408545508}, {"provider": "BaseTen", "tag": "baseten/fp4", "context": 128072, "quantization": "fp4", "max_completion_tokens": 128072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99271366526058}, {"provider": "Io Net", "tag": "io-net/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.175, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.41656942823805, "uptime_1d": 98.77918419980996}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92728594800944, "uptime_1d": 99.95947194326072}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.86095661846495, "uptime_1d": 99.89755847703603}, {"provider": "Together", "tag": "together", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95662546085448, "uptime_1d": 99.07211291024981}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.97408181000442, "uptime_1d": 99.97658850013684}, {"provider": "Fireworks", "tag": "fireworks", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.99063983419136}, {"provider": "WandB", "tag": "wandb/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96481763809078, "uptime_1d": 99.98796143807846}, {"provider": "Nebius", "tag": "nebius/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91860654403386, "uptime_1d": 99.94320819112627}, {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.9613501674826}, {"provider": "Parasail", "tag": "parasail/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98392412185517, "uptime_1d": 99.87735951899775}, {"provider": "SambaNova", "tag": "sambanova", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.95, "throughput_tps": null, "latency_ms": null, "uptime_30m": 92.77728743748162, "uptime_1d": 96.00370780246493}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 40960, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92981361615847, "uptime_1d": 99.96287358455541}], "cheapest": {"provider": "DekaLLM", "tag": "dekallm/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.039, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.5614476485388, "uptime_1d": 47.60379344547681}}, {"id": "openai/gpt-oss-20b:free", "name": "OpenAI: gpt-oss-20b (free)", "hf": "openai/gpt-oss-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9381857518158, "uptime_1d": 96.28670545652344}], "cheapest": {"provider": "OpenInference", "tag": "open-inference/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9381857518158, "uptime_1d": 96.28670545652344}}, {"id": "openai/gpt-oss-20b", "name": "OpenAI: gpt-oss-20b", "hf": "openai/gpt-oss-20b", "context": 131072, "open_weight": true, "params_total_b": 20.0, "params_active_b": 20.0, "kv_gb_per_1k": 0.32, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.02940690566662, "uptime_1d": 99.93134805131857}, {"provider": "Novita", "tag": "novita/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.18494200043773, "uptime_1d": 96.24104528568287}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.15384615384616, "uptime_1d": 83.29677683185248}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.4768962510898, "uptime_1d": 81.09778413627895}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.15, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.88151658767772, "uptime_1d": 99.8191531951903}, {"provider": "Parasail", "tag": "parasail/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95689895594059}, {"provider": "Together", "tag": "together", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.92732558139535, "uptime_1d": 99.61435775734203}, {"provider": "WandB", "tag": "wandb/fp4", "context": 131072, "quantization": "fp4", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99493978342274, "uptime_1d": 99.98588994154404}, {"provider": "Google", "tag": "google-vertex", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99505293361037}, {"provider": "Fireworks", "tag": "fireworks", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.86135693215338, "uptime_1d": 99.01289679174496}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.71052948980824, "uptime_1d": 99.71441514793479}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.42693409742121, "uptime_1d": 99.73719558488582}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 131072, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.02940690566662, "uptime_1d": 99.93134805131857}}, {"id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "hf": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "context": 160000, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 160000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.27, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.72602739726028, "uptime_1d": 99.78641069454216}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": 89.8876404494382, "uptime_1d": 94.39538787126907}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 0, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}, {"provider": "Alibaba", "tag": "alibaba", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.2925, "completion_per_mtok": 1.4625, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.8025666337611}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 160000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.07, "completion_per_mtok": 0.27, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.72602739726028, "uptime_1d": 99.78641069454216}}, {"id": "qwen/qwen3-30b-a3b-instruct-2507", "name": "Qwen: Qwen3 30B A3B Instruct 2507", "hf": "Qwen/Qwen3-30B-A3B-Instruct-2507", "context": 262144, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.49427917620137, "uptime_1d": 80.1509162773985}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96983135728723, "uptime_1d": 98.53410967656384}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70758784754919, "uptime_1d": 99.72369259153507}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99127678753162, "uptime_1d": 99.95009143553794}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91720483523763, "uptime_1d": 99.78613389642383}, {"provider": "Venice", "tag": "venice", "context": 256000, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.19, "completion_per_mtok": 0.69, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.68926553672316, "uptime_1d": 94.74492683321206}], "cheapest": {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.49427917620137, "uptime_1d": 80.1509162773985}}, {"id": "qwen/qwen3-235b-a22b-thinking-2507", "name": "Qwen: Qwen3 235B A22B Thinking 2507", "hf": "Qwen/Qwen3-235B-A22B-Thinking-2507", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 1.495, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96027692656907}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.22999999999999998, "completion_per_mtok": 2.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.83748645720478}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.28, "completion_per_mtok": 2.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.79536152796726}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.3, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.50093574547722}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 1.495, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.96027692656907}}, {"id": "z-ai/glm-4-32b", "name": "Z.ai: GLM 4 32B ", "hf": "", "context": 128000, "open_weight": false, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.512, "providers": [{"provider": "Z.AI", "tag": "z-ai", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Z.AI", "tag": "z-ai", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-coder:free", "name": "Qwen: Qwen3 Coder 480B A35B (free)", "hf": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "context": 262000, "open_weight": true, "params_total_b": 480.0, "params_active_b": 35.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 262000, "quantization": "fp8", "max_completion_tokens": 262000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 48.627234199373504}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 262000, "quantization": "fp8", "max_completion_tokens": 262000, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 48.627234199373504}}, {"id": "qwen/qwen3-coder", "name": "Qwen: Qwen3 Coder 480B A35B", "hf": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "context": 262144, "open_weight": true, "params_total_b": 480.0, "params_active_b": 35.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 262144, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 77.96143250688705, "uptime_1d": 98.13233599134382}, {"provider": "Novita", "tag": "novita/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.52681388012618, "uptime_1d": 99.58604376108812}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 65536, "prompt_per_mtok": 0.35, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 40.76923076923077, "uptime_1d": 92.72514315460697}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.22, "completion_per_mtok": 1.7999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.9010989010989, "uptime_1d": 99.77141257697332}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 97.53236245954693}, {"provider": "Together", "tag": "together/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 2.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.14563106796116, "uptime_1d": 97.22355193872666}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.78, "completion_per_mtok": 3.8, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.57645065650148}, {"provider": "Alibaba", "tag": "alibaba/opensource", "context": 262144, "quantization": "unknown", "max_completion_tokens": 65536, "prompt_per_mtok": 0.975, "completion_per_mtok": 4.875, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.75429975429975, "uptime_1d": 99.56225680933852}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 262144, "quantization": "fp4", "max_completion_tokens": 65536, "prompt_per_mtok": 0.3, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 77.96143250688705, "uptime_1d": 98.13233599134382}}, {"id": "bytedance/ui-tars-1.5-7b", "name": "ByteDance: UI-TARS 7B ", "hf": "ByteDance-Seed/UI-TARS-1.5-7B", "context": 128000, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.112, "providers": [{"provider": "Parasail", "tag": "parasail/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 2048, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 2048, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", "hf": "Qwen/Qwen3-235B-A22B-Instruct-2507", "context": 262144, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.071, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.6719810227837, "uptime_1d": 97.27675475972455}, {"provider": "WandB", "tag": "wandb/bf16", "context": 262144, "quantization": "bf16", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.70591114566238, "uptime_1d": 89.619085734929}, {"provider": "Novita", "tag": "novita/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.58, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.78885946847113, "uptime_1d": 98.93488888843753}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 262144, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 81.20282736312447, "uptime_1d": 84.51794213955372}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.79015843038506, "uptime_1d": 99.34773523644196}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.14950000000000002, "completion_per_mtok": 0.5980000000000001, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.9717673630717, "uptime_1d": 98.53499176001077}, {"provider": "Together", "tag": "together", "context": 262144, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 50.67750677506775, "uptime_1d": 83.7552917738856}, {"provider": "Friendli", "tag": "friendli", "context": 262144, "quantization": "unknown", "max_completion_tokens": 262144, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.581589958159, "uptime_1d": 98.87134714557135}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.82164090368609, "uptime_1d": 99.41333333333333}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 16384, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.9446279571535}, {"provider": "Google", "tag": "google-vertex", "context": 262144, "quantization": "unknown", "max_completion_tokens": 16384, "prompt_per_mtok": 0.25, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93549820470004}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": 40960, "prompt_per_mtok": 0.6, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.73564308706673}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 262144, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.071, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.6719810227837, "uptime_1d": 97.27675475972455}}, {"id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", "name": "Venice: Uncensored (free)", "hf": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Venice", "tag": "venice/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 56.587537091988125}], "cheapest": {"provider": "Venice", "tag": "venice/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 56.587537091988125}}, {"id": "google/gemma-3n-e2b-it:free", "name": "Google: Gemma 3n 2B (free)", "hf": "google/gemma-3n-E2B-it", "context": 8192, "open_weight": true, "params_total_b": 2.0, "params_active_b": 2.0, "kv_gb_per_1k": 0.008, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95373048004626}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95373048004626}}, {"id": "baidu/ernie-4.5-vl-424b-a47b", "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "hf": "baidu/ERNIE-4.5-VL-424B-A47B-PT", "context": 123000, "open_weight": true, "params_total_b": 424.0, "params_active_b": 47.0, "kv_gb_per_1k": 6.784, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 123000, "quantization": "fp16", "max_completion_tokens": 16000, "prompt_per_mtok": 0.42, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 123000, "quantization": "fp16", "max_completion_tokens": 16000, "prompt_per_mtok": 0.42, "completion_per_mtok": 1.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "baidu/ernie-4.5-300b-a47b", "name": "Baidu: ERNIE 4.5 300B A47B ", "hf": "baidu/ERNIE-4.5-300B-A47B-PT", "context": 123000, "open_weight": true, "params_total_b": 300.0, "params_active_b": 47.0, "kv_gb_per_1k": 4.8, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 123000, "quantization": "bf16", "max_completion_tokens": 12000, "prompt_per_mtok": 0.28, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 123000, "quantization": "bf16", "max_completion_tokens": 12000, "prompt_per_mtok": 0.28, "completion_per_mtok": 1.1, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mistral-small-3.2-24b-instruct", "name": "Mistral: Mistral Small 3.2 24B", "hf": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "context": 128000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59782070331848, "uptime_1d": 98.30798143249059}, {"provider": "Venice", "tag": "venice/fp8", "context": 256000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09375, "completion_per_mtok": 0.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.6415163614668}, {"provider": "Mistral", "tag": "mistral", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 75.37328490718322, "uptime_1d": 99.06654325303747}, {"provider": "Parasail", "tag": "parasail/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95473064735174, "uptime_1d": 99.83462221144899}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.075, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.59782070331848, "uptime_1d": 98.30798143249059}}, {"id": "google/gemma-3n-e4b-it:free", "name": "Google: Gemma 3n 4B (free)", "hf": "google/gemma-3n-E4B-it", "context": 8192, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92182410423453}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 8192, "quantization": "unknown", "max_completion_tokens": 2048, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92182410423453}}, {"id": "google/gemma-3n-e4b-it", "name": "Google: Gemma 3n 4B", "hf": "google/gemma-3n-E4B-it", "context": 32768, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98437622060776, "uptime_1d": 99.97245632878446}], "cheapest": {"provider": "Together", "tag": "together", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.12, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98437622060776, "uptime_1d": 99.97245632878446}}, {"id": "meta-llama/llama-guard-4-12b", "name": "Meta: Llama Guard 4 12B", "hf": "meta-llama/Llama-Guard-4-12B", "context": 163840, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 163840, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.18, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.83663771915452, "uptime_1d": 99.87231927329199}, {"provider": "Together", "tag": "together", "context": 1048576, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94244808501915}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 163840, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.18, "completion_per_mtok": 0.18, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.83663771915452, "uptime_1d": 99.87231927329199}}, {"id": "qwen/qwen3-30b-a3b", "name": "Qwen: Qwen3 30B A3B", "hf": "Qwen/Qwen3-30B-A3B", "context": 40960, "open_weight": true, "params_total_b": 30.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.12, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 69.76744186046511, "uptime_1d": 83.58137531703115}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91809991809993, "uptime_1d": 99.24615614270786}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.52, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99693350301283}, {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.55, "throughput_tps": null, "latency_ms": null, "uptime_30m": 85.42074363992172, "uptime_1d": 92.89517470881864}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 69.76744186046511, "uptime_1d": 83.58137531703115}}, {"id": "qwen/qwen3-8b", "name": "Qwen: Qwen3 8B", "hf": "Qwen/Qwen3-8B", "context": 40960, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66654316413486, "uptime_1d": 98.91310699468241}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.117, "completion_per_mtok": 0.45499999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.7270423084422, "uptime_1d": 99.80121551822462}], "cheapest": {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.66654316413486, "uptime_1d": 98.91310699468241}}, {"id": "qwen/qwen3-14b", "name": "Qwen: Qwen3 14B", "hf": "Qwen/Qwen3-14B", "context": 40960, "open_weight": true, "params_total_b": 14.0, "params_active_b": 14.0, "kv_gb_per_1k": 0.056, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 40960, "quantization": "int4", "max_completion_tokens": 40960, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.3702698018372}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.12, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.4122832794593}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.22749999999999998, "completion_per_mtok": 0.9099999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.9156355455568}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 40960, "quantization": "int4", "max_completion_tokens": 40960, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.3702698018372}}, {"id": "qwen/qwen3-32b", "name": "Qwen: Qwen3 32B", "hf": "Qwen/Qwen3-32B", "context": 40960, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Chutes", "tag": "chutes/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 40960, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 60.63829787234043, "uptime_1d": 80.71855117417905}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.28, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99123172362393, "uptime_1d": 99.95532706624392}, {"provider": "Nebius", "tag": "nebius/base", "context": 40960, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.95456610631531, "uptime_1d": 99.9501359389284}, {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.10400000000000001, "completion_per_mtok": 0.41600000000000004, "throughput_tps": null, "latency_ms": null, "uptime_30m": 93.11897446763429, "uptime_1d": 94.96507040214283}, {"provider": "Novita", "tag": "novita/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 20000, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 82.76566757493188, "uptime_1d": 85.58817107028106}, {"provider": "SiliconFlow", "tag": "siliconflow/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.5700000000000001, "throughput_tps": null, "latency_ms": null, "uptime_30m": 71.65699395949308, "uptime_1d": 61.411808938950074}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 40960, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.59, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.64772042343772}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.84189723320158, "uptime_1d": 99.92557216398933}], "cheapest": {"provider": "Chutes", "tag": "chutes/fp8", "context": 40960, "quantization": "fp8", "max_completion_tokens": 40960, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.24, "throughput_tps": null, "latency_ms": null, "uptime_30m": 60.63829787234043, "uptime_1d": 80.71855117417905}}, {"id": "qwen/qwen3-235b-a22b", "name": "Qwen: Qwen3 235B A22B", "hf": "Qwen/Qwen3-235B-A22B", "context": 131072, "open_weight": true, "params_total_b": 235.0, "params_active_b": 22.0, "kv_gb_per_1k": 0.9400000000000001, "providers": [{"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.45499999999999996, "completion_per_mtok": 1.8199999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Alibaba", "tag": "alibaba", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.45499999999999996, "completion_per_mtok": 1.8199999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "alfredpros/codellama-7b-instruct-solidity", "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "hf": "AlfredPros/CodeLlama-7b-Instruct-Solidity", "context": 4096, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.112, "providers": [{"provider": "Featherless", "tag": "featherless", "context": 4096, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Featherless", "tag": "featherless", "context": 4096, "quantization": "unknown", "max_completion_tokens": 4096, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "mistralai/mistral-small-3.1-24b-instruct", "name": "Mistral: Mistral Small 3.1 24B", "hf": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "context": 128000, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "google/gemma-3-4b-it:free", "name": "Google: Gemma 3 4B (free)", "hf": "google/gemma-3-4b-it", "context": 32768, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94678020223523}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.94678020223523}}, {"id": "google/gemma-3-4b-it", "name": "Google: Gemma 3 4B", "hf": "google/gemma-3-4b-it", "context": 131072, "open_weight": true, "params_total_b": 4.0, "params_active_b": 4.0, "kv_gb_per_1k": 0.016, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99791014520014}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99791014520014}}, {"id": "google/gemma-3-12b-it:free", "name": "Google: Gemma 3 12B (free)", "hf": "google/gemma-3-12b-it", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.048, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47089947089947, "uptime_1d": 99.71797884841364}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 32768, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47089947089947, "uptime_1d": 99.71797884841364}}, {"id": "google/gemma-3-12b-it", "name": "Google: Gemma 3 12B", "hf": "google/gemma-3-12b-it", "context": 131072, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.048, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.13, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99274934485152, "uptime_1d": 99.90805156520699}, {"provider": "SambaNova", "tag": "sambanova", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.59, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.12514263978699, "uptime_1d": 99.66111412570517}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.35, "completion_per_mtok": 0.56, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.19082939986514, "uptime_1d": 99.87338893884427}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.13, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.99274934485152, "uptime_1d": 99.90805156520699}}, {"id": "google/gemma-3-27b-it:free", "name": "Google: Gemma 3 27B (free)", "hf": "google/gemma-3-27b-it", "context": 131072, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93304767005891}], "cheapest": {"provider": "Google AI Studio", "tag": "google-ai-studio", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.93304767005891}}, {"id": "google/gemma-3-27b-it", "name": "Google: Gemma 3 27B", "hf": "google/gemma-3-27b-it", "context": 131072, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.76609105180533, "uptime_1d": 96.21278190365878}, {"provider": "Novita", "tag": "novita/bf16", "context": 98304, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.119, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 66.57407407407408, "uptime_1d": 84.05480625133805}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 110000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.63198867657466, "uptime_1d": 99.92719063073233}, {"provider": "Phala", "tag": "phala", "context": 53920, "quantization": "unknown", "max_completion_tokens": 53920, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.64837905236908, "uptime_1d": 95.71753033291495}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.44999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98462366418083, "uptime_1d": 99.80558315445853}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.08, "completion_per_mtok": 0.16, "throughput_tps": null, "latency_ms": null, "uptime_30m": 96.76609105180533, "uptime_1d": 96.21278190365878}}, {"id": "thedrummer/skyfall-36b-v2", "name": "TheDrummer: Skyfall 36B V2", "hf": "TheDrummer/Skyfall-36B-v2", "context": 32768, "open_weight": true, "params_total_b": 36.0, "params_active_b": 36.0, "kv_gb_per_1k": 0.5760000000000001, "providers": [{"provider": "Parasail", "tag": "parasail/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.55, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Parasail", "tag": "parasail/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.55, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "meta-llama/llama-guard-3-8b", "name": "Llama Guard 3 8B", "hf": "meta-llama/Llama-Guard-3-8B", "context": 131072, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.48, "completion_per_mtok": 0.03, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 131072, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.48, "completion_per_mtok": 0.03, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": null}}, {"id": "aion-labs/aion-rp-llama-3.1-8b", "name": "AionLabs: Aion-RP 1.0 (8B)", "hf": "", "context": 32768, "open_weight": false, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "AionLabs", "tag": "aion-labs", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.5999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "AionLabs", "tag": "aion-labs", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.5999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen2.5-vl-72b-instruct", "name": "Qwen: Qwen2.5 VL 72B Instruct", "hf": "Qwen/Qwen2.5-VL-72B-Instruct", "context": 32000, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 0.28800000000000003, "providers": [{"provider": "Nebius", "tag": "nebius/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.45570003023889, "uptime_1d": 99.33195414188431}, {"provider": "Novita", "tag": "novita/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.76095617529882, "uptime_1d": 99.64309865504546}, {"provider": "Parasail", "tag": "parasail/fp8", "context": 128000, "quantization": "fp8", "max_completion_tokens": 128000, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.0856102003643, "uptime_1d": 98.58166661669016}], "cheapest": {"provider": "Nebius", "tag": "nebius/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.45570003023889, "uptime_1d": 99.33195414188431}}, {"id": "mistralai/mistral-small-24b-instruct-2501", "name": "Mistral: Mistral Small 3", "hf": "mistralai/Mistral-Small-24B-Instruct-2501", "context": 32768, "open_weight": true, "params_total_b": 24.0, "params_active_b": 24.0, "kv_gb_per_1k": 0.096, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99961400244722}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99961400244722}}, {"id": "deepseek/deepseek-r1-distill-qwen-32b", "name": "DeepSeek: R1 Distill Qwen 32B", "hf": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "context": 32768, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.29, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "deepseek/deepseek-r1-distill-llama-70b", "name": "DeepSeek: R1 Distill Llama 70B", "hf": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.7, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.14893617021276, "uptime_1d": 99.67477851295278}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.73049857763138}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.7, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.14893617021276, "uptime_1d": 99.67477851295278}}, {"id": "sao10k/l3.1-70b-hanami-x1", "name": "Sao10K: Llama 3.1 70B Hanami x1", "hf": "Sao10K/L3.1-70B-Hanami-x1", "context": 16000, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Infermatic", "tag": "infermatic/bf16", "context": 16000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 3.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Infermatic", "tag": "infermatic/bf16", "context": 16000, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 3.0, "completion_per_mtok": 3.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "sao10k/l3.3-euryale-70b", "name": "Sao10K: Llama 3.3 Euryale 70B", "hf": "Sao10K/L3.3-70B-Euryale-v2.3", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.16342412451361, "uptime_1d": 87.91520772215388}, {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.92634602636812}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.16342412451361, "uptime_1d": 87.91520772215388}}, {"id": "meta-llama/llama-3.3-70b-instruct:free", "name": "Meta: Llama 3.3 70B Instruct (free)", "hf": "meta-llama/Llama-3.3-70B-Instruct", "context": 65536, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "Venice", "tag": "venice/fp8", "context": 65536, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 67.36545682102629}], "cheapest": {"provider": "Venice", "tag": "venice/fp8", "context": 65536, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 67.36545682102629}}, {"id": "meta-llama/llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", "hf": "meta-llama/Llama-3.3-70B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.32, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.37143559488692, "uptime_1d": 98.01275607139937}, {"provider": "Inceptron", "tag": "inceptron/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 131072, "prompt_per_mtok": 0.12, "completion_per_mtok": 0.38, "throughput_tps": null, "latency_ms": null, "uptime_30m": 97.32851985559566, "uptime_1d": 96.96112735464698}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.67809431836471, "uptime_1d": 99.59633005674003}, {"provider": "AkashML", "tag": "akashml/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 128000, "prompt_per_mtok": 0.13, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.36109618484686, "uptime_1d": 98.92760663776077}, {"provider": "Novita", "tag": "novita/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 120000, "prompt_per_mtok": 0.135, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.47046843177189, "uptime_1d": 99.62453463224054}, {"provider": "Parasail", "tag": "parasail/int8", "context": 131072, "quantization": "int8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.22170301142263, "uptime_1d": 99.08461520901885}, {"provider": "Friendli", "tag": "friendli", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.6, "completion_per_mtok": 0.6, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.97261024376883}, {"provider": "SambaNova", "tag": "sambanova-turbo", "context": 16000, "quantization": "bf16", "max_completion_tokens": 3072, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.8999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 83.9226150767178, "uptime_1d": 93.05003861855897}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.59, "completion_per_mtok": 0.7899999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.94692144373673, "uptime_1d": 99.96567409485316}, {"provider": "WandB", "tag": "wandb/fp16", "context": 128000, "quantization": "fp16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.71, "completion_per_mtok": 0.71, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99297890864156}, {"provider": "Google", "tag": "google-vertex", "context": 128000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 90.73569482288828, "uptime_1d": 92.14145383104125}, {"provider": "Google", "tag": "google-vertex", "context": 128000, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.90590809628009, "uptime_1d": 99.55547148396859}, {"provider": "Together", "tag": "together/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.88, "completion_per_mtok": 0.88, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.37888198757764, "uptime_1d": 98.27067669172932}, {"provider": "SambaNova", "tag": "sambanova/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 3072, "prompt_per_mtok": 0.6, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 72.63157894736842, "uptime_1d": 81.90545004128819}, {"provider": "Cloudflare", "tag": "cloudflare/fp8", "context": 24000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.29, "completion_per_mtok": 2.25, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.33531510107015, "uptime_1d": 96.5827169740661}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.32, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.37143559488692, "uptime_1d": 98.01275607139937}}, {"id": "qwen/qwen-2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B Instruct", "hf": "Qwen/Qwen2.5-Coder-32B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 32.0, "params_active_b": 32.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.66, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 32768, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.66, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "thedrummer/unslopnemo-12b", "name": "TheDrummer: UnslopNemo 12B", "hf": "TheDrummer/UnslopNemo-12B-v4.1", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.75133474731223}], "cheapest": {"provider": "NextBit", "tag": "nextbit/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 32768, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.75133474731223}}, {"id": "anthracite-org/magnum-v4-72b", "name": "Magnum v4 72B", "hf": "anthracite-org/magnum-v4-72b", "context": 16384, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 1.1520000000000001, "providers": [{"provider": "Mancer 2", "tag": "mancer/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 3.0, "completion_per_mtok": 5.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 3.0, "completion_per_mtok": 5.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "qwen/qwen-2.5-7b-instruct", "name": "Qwen: Qwen2.5 7B Instruct", "hf": "Qwen/Qwen2.5-7B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.028, "providers": [{"provider": "Phala", "tag": "phala", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 20.853310087655576, "uptime_1d": 95.99651272168308}, {"provider": "AtlasCloud", "tag": "atlas-cloud/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.75977287617384, "uptime_1d": 99.30429570217598}, {"provider": "Together", "tag": "together/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.70535048117658, "uptime_1d": 99.7318058406728}], "cheapest": {"provider": "Phala", "tag": "phala", "context": 32768, "quantization": "unknown", "max_completion_tokens": 32768, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 20.853310087655576, "uptime_1d": 95.99651272168308}}, {"id": "nvidia/llama-3.1-nemotron-70b-instruct", "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "hf": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.2, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.2, "completion_per_mtok": 1.2, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "thedrummer/rocinante-12b", "name": "TheDrummer: Rocinante 12B", "hf": "TheDrummer/Rocinante-12B-v1.1", "context": 32768, "open_weight": true, "params_total_b": 12.0, "params_active_b": 12.0, "kv_gb_per_1k": 0.192, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.16999999999999998, "completion_per_mtok": 0.43, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7627326651667}, {"provider": "Infermatic", "tag": "infermatic/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": null, "prompt_per_mtok": 0.25, "completion_per_mtok": 0.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.52830188679245, "uptime_1d": 99.98148319600037}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 32768, "quantization": "bf16", "max_completion_tokens": 32768, "prompt_per_mtok": 0.16999999999999998, "completion_per_mtok": 0.43, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.7627326651667}}, {"id": "meta-llama/llama-3.2-3b-instruct:free", "name": "Meta: Llama 3.2 3B Instruct (free)", "hf": "meta-llama/Llama-3.2-3B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Venice", "tag": "venice/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 55.24475524475524}], "cheapest": {"provider": "Venice", "tag": "venice/fp16", "context": 131072, "quantization": "fp16", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 55.24475524475524}}, {"id": "meta-llama/llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", "hf": "meta-llama/Llama-3.2-3B-Instruct", "context": 80000, "open_weight": true, "params_total_b": 3.0, "params_active_b": 3.0, "kv_gb_per_1k": 0.012, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.051, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99869485135171}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 80000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.051, "completion_per_mtok": 0.33999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99869485135171}}, {"id": "meta-llama/llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", "hf": "meta-llama/Llama-3.2-1B-Instruct", "context": 60000, "open_weight": true, "params_total_b": 1.0, "params_active_b": 1.0, "kv_gb_per_1k": 0.004, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 60000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.027, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 60000, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.027, "completion_per_mtok": 0.19999999999999998, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "meta-llama/llama-3.2-11b-vision-instruct", "name": "Meta: Llama 3.2 11B Vision Instruct", "hf": "meta-llama/Llama-3.2-11B-Vision-Instruct", "context": 131072, "open_weight": true, "params_total_b": 11.0, "params_active_b": 11.0, "kv_gb_per_1k": 0.044, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.245, "completion_per_mtok": 0.245, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.245, "completion_per_mtok": 0.245, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", "hf": "Qwen/Qwen2.5-72B-Instruct", "context": 32768, "open_weight": true, "params_total_b": 72.0, "params_active_b": 72.0, "kv_gb_per_1k": 0.28800000000000003, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.36, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.971870604782, "uptime_1d": 99.71919789705181}, {"provider": "Novita", "tag": "novita/bf16", "context": 32000, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.38, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 99.74554707379136}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.36, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.971870604782, "uptime_1d": 99.71919789705181}}, {"id": "sao10k/l3.1-euryale-70b", "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "hf": "Sao10K/L3.1-70B-Euryale-v2.2", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95469646632438}, {"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.85, "completion_per_mtok": 0.85, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.95469646632438}}, {"id": "nousresearch/hermes-3-llama-3.1-70b", "name": "Nous: Hermes 3 70B Instruct", "hf": "NousResearch/Hermes-3-Llama-3.1-70B", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.3, "completion_per_mtok": 0.3, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "nousresearch/hermes-3-llama-3.1-405b:free", "name": "Nous: Hermes 3 405B Instruct (free)", "hf": "NousResearch/Hermes-3-Llama-3.1-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 1.62, "providers": [{"provider": "Venice", "tag": "venice/beta", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 50.265708033760546}], "cheapest": {"provider": "Venice", "tag": "venice/beta", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.0, "completion_per_mtok": 0.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 50.265708033760546}}, {"id": "nousresearch/hermes-3-llama-3.1-405b", "name": "Nous: Hermes 3 405B Instruct", "hf": "NousResearch/Hermes-3-Llama-3.1-405B", "context": 131072, "open_weight": true, "params_total_b": 405.0, "params_active_b": 405.0, "kv_gb_per_1k": 1.62, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 1.0, "completion_per_mtok": 1.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "sao10k/l3-lunaris-8b", "name": "Sao10K: Llama 3 8B Lunaris", "hf": "Sao10K/L3-8B-Lunaris-v1", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.128, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 8192, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96606142881384, "uptime_1d": 99.96301856973528}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.10600255427842, "uptime_1d": 94.32528743360452}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 8192, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.96606142881384, "uptime_1d": 99.96301856973528}}, {"id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", "hf": "meta-llama/Meta-Llama-3.1-8B-Instruct", "context": 16384, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.69707401032701, "uptime_1d": 97.99223527344239}, {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.5028961701392, "uptime_1d": 99.6032352450478}, {"provider": "Nebius", "tag": "nebius/fp8", "context": 131072, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.64943725454019, "uptime_1d": 99.51845375057525}, {"provider": "Groq", "tag": "groq", "context": 131072, "quantization": "unknown", "max_completion_tokens": 131072, "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.08, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.98667732480682, "uptime_1d": 99.97299342866116}, {"provider": "Friendli", "tag": "friendli", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8000, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98796820066211}, {"provider": "Cerebras", "tag": "cerebras/fp16", "context": 32768, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 15.491452991452991, "uptime_1d": 14.93305029392554}, {"provider": "Cloudflare", "tag": "cloudflare/fp8", "context": 32000, "quantization": "fp8", "max_completion_tokens": null, "prompt_per_mtok": 0.15, "completion_per_mtok": 0.29, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.03948772678763, "uptime_1d": 99.90947593029647}, {"provider": "WandB", "tag": "wandb/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.22, "completion_per_mtok": 0.22, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 16384, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.02, "completion_per_mtok": 0.049999999999999996, "throughput_tps": null, "latency_ms": null, "uptime_30m": 95.69707401032701, "uptime_1d": 97.99223527344239}}, {"id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", "hf": "meta-llama/Meta-Llama-3.1-70B-Instruct", "context": 131072, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.15870125929298, "uptime_1d": 99.71119426591363}, {"provider": "DeepInfra", "tag": "deepinfra/base", "context": 131072, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 98.27385570209465, "uptime_1d": 99.54320793325988}, {"provider": "Amazon Bedrock", "tag": "amazon-bedrock", "context": 131072, "quantization": "unknown", "max_completion_tokens": 8192, "prompt_per_mtok": 0.72, "completion_per_mtok": 0.72, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.78577549271637, "uptime_1d": 97.3890742094681}, {"provider": "WandB", "tag": "wandb/bf16", "context": 128000, "quantization": "bf16", "max_completion_tokens": 128000, "prompt_per_mtok": 0.7999999999999999, "completion_per_mtok": 0.7999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.98133569958702}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/turbo", "context": 131072, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 94.15870125929298, "uptime_1d": 99.71119426591363}}, {"id": "google/gemma-2-27b-it", "name": "Google: Gemma 2 27B", "hf": "google/gemma-2-27b-it", "context": 8192, "open_weight": true, "params_total_b": 27.0, "params_active_b": 27.0, "kv_gb_per_1k": 0.108, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": 2048, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": 2048, "prompt_per_mtok": 0.65, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "sao10k/l3-euryale-70b", "name": "Sao10k: Llama 3 Euryale 70B v2.1", "hf": "Sao10K/L3-70B-Euryale-v2.1", "context": 8192, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 1.12, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 1.48, "completion_per_mtok": 1.48, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "hf": "NousResearch/Hermes-2-Pro-Llama-3-8B", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "Novita", "tag": "novita/fp16", "context": 8192, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp16", "context": 8192, "quantization": "fp16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.14, "completion_per_mtok": 0.14, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "meta-llama/llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", "hf": "meta-llama/Meta-Llama-3-8B-Instruct", "context": 8192, "open_weight": true, "params_total_b": 8.0, "params_active_b": 8.0, "kv_gb_per_1k": 0.032, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99594542542624}, {"provider": "Novita", "tag": "novita/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 8192, "prompt_per_mtok": 0.04, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}, {"provider": "Together", "tag": "together/int4", "context": 8192, "quantization": "int4", "max_completion_tokens": null, "prompt_per_mtok": 0.09999999999999999, "completion_per_mtok": 0.09999999999999999, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.16107382550335, "uptime_1d": 91.95635282591805}, {"provider": "Cloudflare", "tag": "cloudflare", "context": 7968, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.28, "completion_per_mtok": 0.83, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/bf16", "context": 8192, "quantization": "bf16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.03, "completion_per_mtok": 0.04, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99594542542624}}, {"id": "meta-llama/llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", "hf": "meta-llama/Meta-Llama-3-70B-Instruct", "context": 8192, "open_weight": true, "params_total_b": 70.0, "params_active_b": 70.0, "kv_gb_per_1k": 0.28, "providers": [{"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8000, "prompt_per_mtok": 0.51, "completion_per_mtok": 0.74, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 8000, "prompt_per_mtok": 0.51, "completion_per_mtok": 0.74, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", "hf": "mistralai/Mixtral-8x22B-Instruct-v0.1", "context": 65536, "open_weight": true, "params_total_b": 147.84, "params_active_b": 44.0, "kv_gb_per_1k": 0.59136, "providers": [{"provider": "Mistral", "tag": "mistral", "context": 65536, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 6.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Mistral", "tag": "mistral", "context": 65536, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 2.0, "completion_per_mtok": 6.0, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "microsoft/wizardlm-2-8x22b", "name": "WizardLM-2 8x22B", "hf": "microsoft/WizardLM-2-8x22B", "context": 65535, "open_weight": true, "params_total_b": 147.84, "params_active_b": 44.0, "kv_gb_per_1k": 2.36544, "providers": [{"provider": "Novita", "tag": "novita/bf16", "context": 65535, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.62, "completion_per_mtok": 0.62, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "Novita", "tag": "novita/bf16", "context": 65535, "quantization": "bf16", "max_completion_tokens": 8000, "prompt_per_mtok": 0.62, "completion_per_mtok": 0.62, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}}, {"id": "mistralai/mixtral-8x7b-instruct", "name": "Mistral: Mixtral 8x7B Instruct", "hf": "mistralai/Mixtral-8x7B-Instruct-v0.1", "context": 32768, "open_weight": true, "params_total_b": 47.04, "params_active_b": 14.0, "kv_gb_per_1k": 0.18816, "providers": [{"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.54, "completion_per_mtok": 0.54, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99671538840532}], "cheapest": {"provider": "DeepInfra", "tag": "deepinfra/fp8", "context": 32768, "quantization": "fp8", "max_completion_tokens": 16384, "prompt_per_mtok": 0.54, "completion_per_mtok": 0.54, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.99671538840532}}, {"id": "alpindale/goliath-120b", "name": "Goliath 120B", "hf": "alpindale/goliath-120b", "context": 6144, "open_weight": true, "params_total_b": 120.0, "params_active_b": 120.0, "kv_gb_per_1k": 1.92, "providers": [{"provider": "Mancer 2", "tag": "mancer/int4", "context": 6144, "quantization": "int4", "max_completion_tokens": 1024, "prompt_per_mtok": 3.75, "completion_per_mtok": 7.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}], "cheapest": {"provider": "Mancer 2", "tag": "mancer/int4", "context": 6144, "quantization": "int4", "max_completion_tokens": 1024, "prompt_per_mtok": 3.75, "completion_per_mtok": 7.5, "throughput_tps": null, "latency_ms": null, "uptime_30m": null, "uptime_1d": 100}}, {"id": "mistralai/mistral-7b-instruct-v0.1", "name": "Mistral: Mistral 7B Instruct v0.1", "hf": "mistralai/Mistral-7B-Instruct-v0.1", "context": 2824, "open_weight": true, "params_total_b": 7.0, "params_active_b": 7.0, "kv_gb_per_1k": 0.028, "providers": [{"provider": "Cloudflare", "tag": "cloudflare", "context": 2824, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.35174854768592}], "cheapest": {"provider": "Cloudflare", "tag": "cloudflare", "context": 2824, "quantization": "unknown", "max_completion_tokens": null, "prompt_per_mtok": 0.11, "completion_per_mtok": 0.19, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.35174854768592}}, {"id": "undi95/remm-slerp-l2-13b", "name": "ReMM SLERP 13B", "hf": "Undi95/ReMM-SLERP-L2-13B", "context": 6144, "open_weight": true, "params_total_b": 13.0, "params_active_b": 13.0, "kv_gb_per_1k": 0.20800000000000002, "providers": [{"provider": "NextBit", "tag": "nextbit/bf16", "context": 6144, "quantization": "bf16", "max_completion_tokens": 4096, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.66506717850288}, {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 6144, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.5, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 100}], "cheapest": {"provider": "NextBit", "tag": "nextbit/bf16", "context": 6144, "quantization": "bf16", "max_completion_tokens": 4096, "prompt_per_mtok": 0.44999999999999996, "completion_per_mtok": 0.65, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 96.66506717850288}}, {"id": "gryphe/mythomax-l2-13b", "name": "MythoMax 13B", "hf": "Gryphe/MythoMax-L2-13b", "context": 4096, "open_weight": true, "params_total_b": 13.0, "params_active_b": 13.0, "kv_gb_per_1k": 0.20800000000000002, "providers": [{"provider": "NextBit", "tag": "nextbit/int4", "context": 4096, "quantization": "int4", "max_completion_tokens": 4096, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.46881135448771}, {"provider": "DeepInfra", "tag": "deepinfra/fp16", "context": 4096, "quantization": "fp16", "max_completion_tokens": 16384, "prompt_per_mtok": 0.39999999999999997, "completion_per_mtok": 0.39999999999999997, "throughput_tps": null, "latency_ms": null, "uptime_30m": 99.91935483870968, "uptime_1d": 99.9922570654278}, {"provider": "Mancer 2", "tag": "mancer/fp8", "context": 8192, "quantization": "fp8", "max_completion_tokens": 2048, "prompt_per_mtok": 0.5, "completion_per_mtok": 0.75, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 99.97910718016576}], "cheapest": {"provider": "NextBit", "tag": "nextbit/int4", "context": 4096, "quantization": "int4", "max_completion_tokens": 4096, "prompt_per_mtok": 0.06, "completion_per_mtok": 0.06, "throughput_tps": null, "latency_ms": null, "uptime_30m": 100, "uptime_1d": 98.46881135448771}}]; | |
| const GPUS = [{"name": "RTX 3090", "tier": "consumer", "vram": 24, "bandwidth": 936, "tdp": 350, "msrp": 1499, "street": 700, "released": 2020, "fp8": false, "fp4": false}, {"name": "RTX 4090", "tier": "consumer", "vram": 24, "bandwidth": 1008, "tdp": 450, "msrp": 1599, "street": 1900, "released": 2022, "fp8": true, "fp4": false}, {"name": "RTX 5070 Ti", "tier": "consumer", "vram": 16, "bandwidth": 896, "tdp": 300, "msrp": 749, "street": 900, "released": 2025, "anchor": true, "fp8": true, "fp4": true}, {"name": "RTX 5080", "tier": "consumer", "vram": 16, "bandwidth": 960, "tdp": 360, "msrp": 999, "street": 1200, "released": 2025, "fp8": true, "fp4": true}, {"name": "RTX 5090", "tier": "consumer", "vram": 32, "bandwidth": 1792, "tdp": 575, "msrp": 1999, "street": 2800, "released": 2025, "fp8": true, "fp4": true}, {"name": "RTX Pro 6000 Blackwell", "tier": "workstation", "vram": 96, "bandwidth": 1792, "tdp": 600, "msrp": 8500, "street": 8800, "released": 2025, "fp8": true, "fp4": true}, {"name": "L4", "tier": "datacenter", "vram": 24, "bandwidth": 300, "tdp": 72, "msrp": 2500, "street": 2400, "released": 2023, "fp8": true, "fp4": false}, {"name": "L40S", "tier": "datacenter", "vram": 48, "bandwidth": 864, "tdp": 350, "msrp": 8000, "street": 7500, "released": 2023, "fp8": true, "fp4": false}, {"name": "A100 40GB", "tier": "datacenter", "vram": 40, "bandwidth": 1555, "tdp": 400, "msrp": 10000, "street": 6800, "released": 2020, "fp8": false, "fp4": false}, {"name": "A100 80GB", "tier": "datacenter", "vram": 80, "bandwidth": 2039, "tdp": 400, "msrp": 15000, "street": 11500, "released": 2021, "fp8": false, "fp4": false}, {"name": "H100 80GB SXM", "tier": "datacenter", "vram": 80, "bandwidth": 3350, "tdp": 700, "msrp": 30000, "street": 24000, "released": 2022, "fp8": true, "fp4": false}, {"name": "H200 141GB", "tier": "datacenter", "vram": 141, "bandwidth": 4800, "tdp": 700, "msrp": 32000, "street": 30000, "released": 2024, "fp8": true, "fp4": false}, {"name": "B200 192GB", "tier": "datacenter", "vram": 192, "bandwidth": 8000, "tdp": 1000, "msrp": 40000, "street": 38000, "released": 2025, "fp8": true, "fp4": true}]; | |
| const FRONTIER = [{"label": "OpenAI GPT-5.x", "model": "OpenAI: GPT-5 Nano", "prompt_per_mtok": 0.049999999999999996, "completion_per_mtok": 0.39999999999999997}, {"label": "Claude Opus", "model": "Anthropic: Claude Opus 4.7", "prompt_per_mtok": 5.0, "completion_per_mtok": 25.0}, {"label": "Claude Sonnet", "model": "Anthropic: Claude Sonnet 4.6", "prompt_per_mtok": 3.0, "completion_per_mtok": 15.0}, {"label": "Gemini 2.x Pro", "model": "Google: Gemini 2.5 Pro", "prompt_per_mtok": 1.25, "completion_per_mtok": 10.0}, {"label": "Grok", "model": "xAI: Grok 4.1 Fast", "prompt_per_mtok": 0.19999999999999998, "completion_per_mtok": 0.5}]; | |
| const ENGINES = {"engines": [{"name": "llama.cpp", "single_stream": 1.0, "batched": 1.0, "hardware": "All (NVIDIA, AMD, Apple, CPU)", "use_case": "Single-user; broadest hardware support; reference baseline.", "notes": "User's anchor. Sequential queueing \u2014 does not scale with concurrency.", "source": "jan.ai TRT-LLM bench: 100 t/s on 4090"}, {"name": "Ollama", "single_stream": 0.95, "batched": 0.3, "hardware": "All", "use_case": "Convenience wrapper around llama.cpp.", "notes": "Same single-stream as llama.cpp; collapses under concurrent load (no continuous batching).", "source": "sitepoint Ollama vs vLLM 2026"}, {"name": "ExLlamaV2", "single_stream": 1.3, "batched": 1.5, "hardware": "NVIDIA only", "use_case": "Single-user enthusiast; consistent single-stream leader on consumer NVIDIA.", "notes": "EXL2 mixed-bpw quantization. 7B 139 t/s on 4090 (~1.3x llama.cpp).", "source": "turboderp/exllama #16"}, {"name": "ExLlamaV3", "single_stream": 1.35, "batched": 1.6, "hardware": "NVIDIA only", "use_case": "EXL2 successor; better low-bpw quality.", "notes": "Estimated; comparable to ExLlamaV2 on speed.", "source": "turboderp-org/exllamav3"}, {"name": "vLLM", "single_stream": 1.05, "batched": 8.0, "hardware": "NVIDIA, AMD ROCm, Gaudi, TPU", "use_case": "Multi-user serving \u2014 de facto open-source production engine.", "notes": "PagedAttention + continuous batching. ~tied with llama.cpp at batch=1; 35-44x ahead under load.", "source": "Red Hat vLLM vs llama.cpp 2025"}, {"name": "SGLang", "single_stream": 1.1, "batched": 10.0, "hardware": "NVIDIA, AMD ROCm", "use_case": "Serving with prefix caching wins (RAG, multi-turn, agents).", "notes": "RadixAttention. ~29% throughput edge over vLLM on H100; up to 6.4x on prefix-heavy workloads.", "source": "premai.io 2026 bench"}, {"name": "TensorRT-LLM", "single_stream": 1.7, "batched": 10.0, "hardware": "NVIDIA only (best on Hopper/Blackwell)", "use_case": "Lowest-latency single-user OR highest-throughput serving on NVIDIA.", "notes": "70% faster than llama.cpp on 4090 (170 vs 100 t/s). Compile-time + ops complexity costs.", "source": "jan.ai TRT-LLM bench"}, {"name": "MLC-LLM", "single_stream": 1.2, "batched": 1.5, "hardware": "NVIDIA, AMD, Apple, Vulkan, WebGPU, mobile", "use_case": "Cross-platform single-user; best-in-class for mobile/Web.", "notes": "TVM-compiled. Strong on non-NVIDIA targets. Multiplier estimated.", "source": "MLC project leaderboard"}, {"name": "LMDeploy", "single_stream": 1.4, "batched": 9.0, "hardware": "NVIDIA only", "use_case": "Production serving \u2014 strong for INT4 large models.", "notes": "TurboMind C++ engine. ~28% faster than vLLM on H100. 2.4x INT4 vs FP16.", "source": "premai.io 2026 bench"}, {"name": "Aphrodite", "single_stream": 1.05, "batched": 8.0, "hardware": "NVIDIA, AMD ROCm", "use_case": "vLLM fork with broader quant support (EXL2 + GGUF + AWQ + GPTQ).", "notes": "Tracks vLLM closely; main draw is format compatibility.", "source": "github.com/aphrodite-engine"}, {"name": "TGI (HF)", "single_stream": 1.0, "batched": 6.0, "hardware": "NVIDIA, AMD, Gaudi, Inferentia", "use_case": "HF's serving engine; vLLM competitor.", "notes": "Built-in speculative decoding (Medusa, n-gram). Slightly behind vLLM under load.", "source": "marktechpost 2025-11"}, {"name": "HF Transformers", "single_stream": 0.4, "batched": 0.5, "hardware": "NVIDIA, AMD, CPU", "use_case": "Reference / research only \u2014 not for production.", "notes": "Eager-mode PyTorch, no kernel fusion. ~2-3x slower than llama.cpp at batch=1.", "source": "HF community consensus"}], "speculative": {"default_speedup": 1.15, "best_case": 2.5, "notes": "Highly workload-dependent. Code/structured-output with vocab-matched draft: up to 2.5x. MoE on consumer Ampere/Ada: often 0.9-1.0x (slowdown reported). Default 1.15x average."}, "fp8_note": "On Hopper (H100/H200): native FP8 \u2248 2x FP16 throughput, quality \u2248 BF16. On Blackwell (5090/B200): native FP4 \u2248 2x FP8 again. On Ampere/Ada: Q4 weight-only is the speed champion at batch=1 (no FP8/FP4 tensor cores). At batch=1 single-stream is memory-bandwidth-bound, so Q4\u2248FP4\u2248FP8\u2248INT8 within \u00b120% on the same GPU.", "batch_caveat": "API providers run batches of 64-256 with continuous batching. Their per-token unit economics are 5-15x better than your batch=1 local number. Comparing single-user local TPS to API price systematically makes local look worse than it is. Use 'Batched serving' mode if you'll concurrent-serve."}; | |
| const QUANTS = [{"id": "FP16", "label": "FP16", "bpp": 2.0, "format": "native", "engines": ["all"], "quality": "lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "FP8", "label": "FP8", "bpp": 1.0, "format": "native", "engines": ["vLLM", "TensorRT-LLM", "SGLang", "LMDeploy", "TGI (HF)"], "quality": "near-lossless", "needs_fp8": true, "needs_fp4": false}, {"id": "NVFP4", "label": "NVFP4", "bpp": 0.5, "format": "native", "engines": ["TensorRT-LLM", "vLLM"], "quality": "good", "needs_fp8": false, "needs_fp4": true}, {"id": "Q8_0", "label": "Q8_0 (GGUF)", "bpp": 1.0625, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "Q6_K", "label": "Q6_K (GGUF)", "bpp": 0.8125, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "Q5_K_M", "label": "Q5_K_M (GGUF)", "bpp": 0.6875, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q4_K_M", "label": "Q4_K_M (GGUF) \u00b7 default", "bpp": 0.5625, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q4_K_S", "label": "Q4_K_S (GGUF)", "bpp": 0.5, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "Q3_K_M", "label": "Q3_K_M (GGUF)", "bpp": 0.4375, "format": "GGUF", "engines": ["llama.cpp", "Ollama"], "quality": "lossy", "needs_fp8": false, "needs_fp4": false}, {"id": "AWQ-INT4", "label": "AWQ INT4", "bpp": 0.5, "format": "AWQ", "engines": ["vLLM", "TGI (HF)", "Aphrodite", "LMDeploy", "SGLang"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "GPTQ-INT4", "label": "GPTQ INT4", "bpp": 0.5, "format": "GPTQ", "engines": ["vLLM", "TGI (HF)", "Aphrodite", "ExLlamaV2", "ExLlamaV3"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-4.0", "label": "EXL2 4.0bpw", "bpp": 0.5, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "good", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-5.0", "label": "EXL2 5.0bpw", "bpp": 0.625, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}, {"id": "EXL2-6.0", "label": "EXL2 6.0bpw", "bpp": 0.75, "format": "EXL2", "engines": ["ExLlamaV2", "ExLlamaV3", "Aphrodite"], "quality": "near-lossless", "needs_fp8": false, "needs_fp4": false}]; | |
| const ANCHOR_EFF = 0.621512; | |
| const S = { | |
| tab: "model", | |
| modelId: null, | |
| gpuName: "RTX 5070 Ti", | |
| mode: "payback", | |
| quant: "Q4_K_M", | |
| kvQuant: localStorage.getItem("kv_quant") || "Q8", | |
| price: "street", // "street" | "msrp" | "custom" | |
| engine: "llama.cpp", | |
| conc: "single", | |
| priceOverrides: JSON.parse(localStorage.getItem("gpu_price_overrides") || "{}"), | |
| apiPriceOverride: null, | |
| usage: JSON.parse(localStorage.getItem("or_usage_v1") || "null"), | |
| budget: parseFloat(localStorage.getItem("budget") || "1000"), | |
| includeDC: localStorage.getItem("include_dc") === "true", | |
| }; | |
| function persistBudget() { localStorage.setItem("budget", String(S.budget)); } | |
| function persistDC() { localStorage.setItem("include_dc", String(S.includeDC)); } | |
| function visibleGpus() { | |
| return GPUS.filter(g => S.includeDC || g.tier !== "datacenter"); | |
| } | |
| function persistOverrides() { localStorage.setItem("gpu_price_overrides", JSON.stringify(S.priceOverrides)); } | |
| const $ = (id) => document.getElementById(id); | |
| const KV_BYTES = {FP16: 2.0, Q8: 1.0, Q4: 0.5}; | |
| const FRAMEWORK_OVERHEAD_GB = 1.0; // CUDA context, activations, framework buffers | |
| // ============================ math | |
| function currentQuant() { return QUANTS.find(q => q.id === S.quant) || QUANTS.find(q => q.id === "Q4_K_M"); } | |
| function currentEngine() { return ENGINES.engines.find(e => e.name === S.engine) || ENGINES.engines[0]; } | |
| function engineMult() { const e = currentEngine(); return S.conc === "single" ? e.single_stream : e.batched; } | |
| function spec() { return parseFloat($("spec").value); } | |
| function ioRatio() { return parseFloat($("io").value); } | |
| function years() { return parseFloat($("years").value); } | |
| function util() { return parseFloat($("util").value); } | |
| function kwh() { return parseFloat($("kwh").value); } | |
| function minTps() { return parseInt($("mintps").value); } | |
| function tpd() { const exp = parseFloat($(activeTpdInput()).value); return Math.pow(10, exp); } | |
| function activeTpdInput() { return S.tab === "model" ? "tpd" : S.tab === "gpu" ? "tpd2" : "tpd3"; } | |
| function ctxTokens(model) { | |
| const id = S.tab === "gpu" ? "ctx2" : "ctx"; | |
| const el = $(id); | |
| if (!el) return 8192; | |
| const exp = parseFloat(el.value); | |
| let n = Math.pow(2, exp); | |
| if (model && model.context) n = Math.min(n, model.context); | |
| return Math.round(n); | |
| } | |
| function weightsGB(model) { | |
| if (!model.params_total_b) return 0; | |
| return model.params_total_b * currentQuant().bpp; | |
| } | |
| function kvGB(model, ctx, kvQuant) { | |
| if (!model.kv_gb_per_1k) return 0; | |
| const q = kvQuant || S.kvQuant; | |
| return (model.kv_gb_per_1k * (ctx / 1000)) * (KV_BYTES[q] / 2.0); | |
| } | |
| function vramNeed(model, ctx, kvQuant) { | |
| return weightsGB(model) + kvGB(model, ctx ?? ctxTokens(model), kvQuant) + FRAMEWORK_OVERHEAD_GB; | |
| } | |
| // Engine ↔ quant compatibility | |
| function engineSupportsQuant(engineName, quantId) { | |
| const q = QUANTS.find(x => x.id === quantId); | |
| if (!q) return true; | |
| if (q.engines.includes("all")) return true; | |
| return q.engines.includes(engineName); | |
| } | |
| function gpuSupportsQuant(gpu, quantId) { | |
| const q = QUANTS.find(x => x.id === quantId); | |
| if (!q) return true; | |
| if (q.needs_fp8 && !gpu.fp8) return false; | |
| if (q.needs_fp4 && !gpu.fp4) return false; | |
| return true; | |
| } | |
| // Returns {ok, reason, kv_used, downgraded, ...}. | |
| // Auto-falls-back from user's KV quant → Q8 → Q4 to find a fit. | |
| function fitCheck(model, gpu, opts = {}) { | |
| const ctx = opts.ctx ?? ctxTokens(model); | |
| if (!gpuSupportsQuant(gpu, S.quant)) return {ok:false, reason:"quant-gpu", detail:`${gpu.name} lacks ${S.quant} support`}; | |
| if (!engineSupportsQuant(S.engine, S.quant)) return {ok:false, reason:"quant-engine", detail:`${S.engine} doesn't support ${S.quant}`}; | |
| const w = weightsGB(model); | |
| if (w + FRAMEWORK_OVERHEAD_GB > gpu.vram) return {ok:false, reason:"weights", detail:`weights ${w.toFixed(1)}GB + overhead > ${gpu.vram}GB`}; | |
| // KV fallback ladder: user's choice → Q8 → Q4. Skip duplicates. | |
| const ladder = [S.kvQuant, "Q8", "Q4"].filter((v, i, a) => a.indexOf(v) === i); | |
| let chosenKv = null, chosenKvBytes = 0; | |
| for (const kvq of ladder) { | |
| const kv = kvGB(model, ctx, kvq); | |
| if (w + kv + FRAMEWORK_OVERHEAD_GB <= gpu.vram) { chosenKv = kvq; chosenKvBytes = kv; break; } | |
| } | |
| if (chosenKv === null) { | |
| // Even with Q4 KV doesn't fit | |
| const minKv = kvGB(model, ctx, "Q4"); | |
| const ctxFitTokens = Math.max(0, (gpu.vram - w - FRAMEWORK_OVERHEAD_GB) * 1000 / (model.kv_gb_per_1k * KV_BYTES["Q4"] / 2)); | |
| return {ok:false, reason:"kv", detail:`even with Q4 KV (${minKv.toFixed(1)}GB) at ${(ctx/1024).toFixed(0)}K context > ${gpu.vram}GB · max ≈ ${(ctxFitTokens/1024).toFixed(0)}K`}; | |
| } | |
| const tps = tpsFor(model, gpu); | |
| if (minTps() > 0 && tps < minTps()) return {ok:false, reason:"tps", tps, detail:`${tps.toFixed(0)} t/s < target ${minTps()} t/s`}; | |
| return { | |
| ok: true, | |
| tps, | |
| vram_used: w + chosenKvBytes + FRAMEWORK_OVERHEAD_GB, | |
| kv_used: chosenKv, | |
| downgraded: chosenKv !== S.kvQuant, | |
| }; | |
| } | |
| function fits(model, gpu) { return fitCheck(model, gpu).ok; } | |
| function tpsFor(model, gpu) { | |
| if (!model.params_active_b) return 0; | |
| const base = ANCHOR_EFF * gpu.bandwidth / (model.params_active_b * currentQuant().bpp); | |
| return base * engineMult() * spec(); | |
| } | |
| function gpuPrice(gpu) { | |
| const o = S.priceOverrides[gpu.name]; | |
| if (typeof o === "number" && o > 0) return o; | |
| return gpu[S.price] || gpu.street; | |
| } | |
| function priceLabel(gpu) { | |
| const o = S.priceOverrides[gpu.name]; | |
| if (typeof o === "number" && o > 0) return "yours"; | |
| return S.price; | |
| } | |
| function apiPerToken(model) { | |
| // Cost per OUTPUT token, including its proportional share of input tokens. | |
| // Total cost = outTok × outC + (outTok × ioRatio) × inC = outTok × (outC + ioRatio × inC) | |
| if (!model.cheapest) return null; | |
| const r = ioRatio(); | |
| const inC = model.cheapest.prompt_per_mtok / 1e6; | |
| const outC = model.cheapest.completion_per_mtok / 1e6; | |
| return outC + r * inC; | |
| } | |
| function dailyApiCost(model) { const p = apiPerToken(model); return p === null ? 0 : p * tpd(); } | |
| function dailyApiBreakdown(model) { | |
| if (!model.cheapest) return null; | |
| const r = ioRatio(); | |
| const out = tpd(); | |
| const inn = out * r; | |
| const inC = model.cheapest.prompt_per_mtok / 1e6; | |
| const outC = model.cheapest.completion_per_mtok / 1e6; | |
| return { | |
| out, inn, | |
| in_cost: inn * inC, | |
| out_cost: out * outC, | |
| total: inn * inC + out * outC, | |
| in_per_mtok: model.cheapest.prompt_per_mtok, | |
| out_per_mtok: model.cheapest.completion_per_mtok, | |
| }; | |
| } | |
| function dailyPower(gpu) { return (gpu.tdp / 1000) * util() * kwh() * 24; } | |
| function dailyAmort(gpu) { return gpuPrice(gpu) / (years() * 365); } | |
| function paybackDays(model, gpu) { | |
| // Days to recoup hardware sticker price from net daily savings (API avoided − electricity). | |
| // Excludes amortization (that's the question — when does sticker price equal what you saved). | |
| // If electricity alone exceeds API savings, GPU never pays back. | |
| const p = apiPerToken(model); | |
| if (!p || p <= 0) return Infinity; | |
| const tpsCap = tpsFor(model, gpu) * 86400; | |
| const usedTokens = Math.min(tpd(), tpsCap); | |
| const dailyNet = (p * usedTokens) - dailyPower(gpu); | |
| if (dailyNet <= 0) return Infinity; | |
| return gpuPrice(gpu) / dailyNet; | |
| } | |
| function dailyTcoSavings(model, gpu) { | |
| const tpsCap = tpsFor(model, gpu) * 86400; | |
| const used = Math.min(tpd(), tpsCap); | |
| const apiSpend = (apiPerToken(model) || 0) * used; | |
| const gpuCost = dailyAmort(gpu) + dailyPower(gpu); | |
| return apiSpend - gpuCost; | |
| } | |
| function fmtTok(n) { | |
| if (!isFinite(n) || n < 0) return "—"; | |
| if (n >= 1e12) return (n/1e12).toFixed(1)+"T"; | |
| if (n >= 1e9) return (n/1e9).toFixed(1)+"B"; | |
| if (n >= 1e6) return (n/1e6).toFixed(1)+"M"; | |
| if (n >= 1e3) return (n/1e3).toFixed(1)+"K"; | |
| return n.toFixed(0); | |
| } | |
| function fmtMoney(n) { | |
| if (!isFinite(n)) return "$∞"; | |
| const sign = n < 0 ? "-" : ""; | |
| n = Math.abs(n); | |
| if (n >= 1000) return sign + "$" + n.toLocaleString(undefined, {maximumFractionDigits: 0}); | |
| if (n >= 10) return sign + "$" + Math.round(n).toLocaleString(); | |
| if (n >= 1) return sign + "$" + n.toFixed(2); | |
| if (n >= 0.001) return sign + "$" + n.toFixed(4); | |
| return sign + "$" + n.toExponential(1); | |
| } | |
| function fmtDays(n) { | |
| if (!isFinite(n) || n < 0) return "never"; | |
| if (n < 1) return (n*24).toFixed(1) + " hours"; | |
| if (n < 60) return Math.round(n) + " days"; | |
| if (n < 730) return (n/30).toFixed(1) + " months"; | |
| return (n/365).toFixed(1) + " years"; | |
| } | |
| // ============================ tabs | |
| function setTab(name) { | |
| S.tab = name; | |
| document.querySelectorAll(".tab-btn").forEach(b => b.dataset.active = b.dataset.tab === name); | |
| document.querySelectorAll("[data-pane]").forEach(p => p.classList.toggle("hidden", p.dataset.pane !== name)); | |
| if (name === "browse") { | |
| syncSlider("tpd3", "tpd-val3"); | |
| renderBrowse(); | |
| } else if (name === "model") { | |
| syncSlider("tpd", "tpd-val"); | |
| renderModelTab(); | |
| } else if (name === "gpu") { | |
| syncSlider("tpd2", "tpd-val2"); | |
| renderGpuTab(); | |
| } else if (name === "usage") { | |
| renderUsageTab(); | |
| } | |
| } | |
| function syncSlider(id, valId) { $(valId).textContent = fmtTok(Math.pow(10, parseFloat($(id).value))); } | |
| function fmtCtx(t) { return t >= 1024 ? (t/1024).toFixed(t >= 10240 ? 0 : 1) + "K" : t; } | |
| function renderAllValueLabels() { | |
| syncSlider("tpd", "tpd-val"); | |
| syncSlider("tpd2", "tpd-val2"); | |
| syncSlider("tpd3", "tpd-val3"); | |
| $("vram-val").textContent = $("vram").value + " GB"; | |
| $("years-val").textContent = years() + "y"; | |
| $("util-val").textContent = (util()*100).toFixed(0) + "%"; | |
| $("kwh-val").textContent = "$" + kwh().toFixed(3); | |
| $("io-val").textContent = ioRatio() + ":1"; | |
| $("spec-val").textContent = spec().toFixed(2) + "x"; | |
| $("mintps-val").textContent = minTps() === 0 ? "off" : minTps() + " t/s"; | |
| const ctxV = Math.round(Math.pow(2, parseFloat($("ctx").value))); | |
| const ctxV2 = Math.round(Math.pow(2, parseFloat($("ctx2").value))); | |
| $("ctx-val").textContent = fmtCtx(ctxV); | |
| $("ctx-val2").textContent = fmtCtx(ctxV2); | |
| const e = currentEngine(); | |
| $("engine-help").textContent = `${e.use_case} · ${S.conc === "single" ? e.single_stream : e.batched}× vs llama.cpp`; | |
| const q = currentQuant(); | |
| $("quant-help").textContent = `${q.format} · ${q.bpp.toFixed(2)} B/param · ${q.quality} · runs on: ${q.engines.join(", ")}`; | |
| // Settings strip on model tab | |
| if ($("settings-strip")) { | |
| $("settings-strip").textContent = `${e.name} · ${q.label} weights · ${S.kvQuant} KV · ≥${minTps()} t/s · ${S.price} prices · ${S.conc === "single" ? "single-stream" : "batched"}`; | |
| } | |
| } | |
| // ============================ MODEL TAB | |
| function setupModelCombobox() { | |
| const input = $("model-search"); | |
| const list = $("model-results"); | |
| const open = () => list.classList.remove("hidden"); | |
| const close = () => list.classList.add("hidden"); | |
| function renderResults(q) { | |
| q = q.toLowerCase().trim(); | |
| const candidates = DATA | |
| .filter(r => r.cheapest && r.params_total_b) | |
| .filter(r => !q || (r.name||"").toLowerCase().includes(q) || r.id.toLowerCase().includes(q)) | |
| .sort((a, b) => { | |
| // Prefer open-weight, then smaller params (fits more GPUs) | |
| if (a.open_weight !== b.open_weight) return b.open_weight - a.open_weight; | |
| return a.params_total_b - b.params_total_b; | |
| }) | |
| .slice(0, 30); | |
| list.innerHTML = candidates.map(r => ` | |
| <button data-mid="${r.id}" class="w-full text-left px-4 py-2 hover:bg-white/5 flex items-center justify-between gap-3"> | |
| <div class="min-w-0"> | |
| <div class="text-sm truncate">${r.name}</div> | |
| <div class="text-[11px] text-zinc-500 mono truncate">${r.id} · ${r.params_total_b}B${r.params_active_b !== r.params_total_b ? " (A"+r.params_active_b+"B)" : ""}</div> | |
| </div> | |
| <div class="text-right text-[11px] mono shrink-0"> | |
| <div>$${(r.cheapest.prompt_per_mtok).toFixed(2)} / $${(r.cheapest.completion_per_mtok).toFixed(2)}</div> | |
| <div class="text-zinc-500">${r.open_weight ? "open" : "closed"} · ${r.providers.length}p</div> | |
| </div> | |
| </button> | |
| `).join("") || `<div class="px-4 py-3 text-sm text-zinc-500">no matches</div>`; | |
| } | |
| // Bind ONCE via delegation — survives renderResults() rebuilds. | |
| list.addEventListener("mousedown", e => { | |
| const b = e.target.closest("button[data-mid]"); | |
| if (!b) return; | |
| e.preventDefault(); // beat input blur | |
| S.modelId = b.dataset.mid; | |
| const r = DATA.find(x => x.id === S.modelId); | |
| input.value = r.name; | |
| $("model-meta").textContent = `${r.params_total_b}B${r.params_active_b !== r.params_total_b ? " (A"+r.params_active_b+"B)" : ""} · ${r.open_weight ? "open" : "closed"}`; | |
| close(); | |
| renderModelTab(); | |
| }); | |
| input.addEventListener("focus", () => { renderResults(input.value); open(); }); | |
| input.addEventListener("input", () => { renderResults(input.value); open(); }); | |
| document.addEventListener("mousedown", e => { if (!e.target.closest('[data-target="model-combo"]')) close(); }); | |
| // Default selection: Qwen3.5 9B if found, else first open model | |
| const defaultPick = DATA.find(r => r.id === "qwen/qwen3.5-9b") || DATA.find(r => r.open_weight && r.cheapest && r.params_total_b); | |
| if (defaultPick) { | |
| S.modelId = defaultPick.id; | |
| input.value = defaultPick.name; | |
| $("model-meta").textContent = `${defaultPick.params_total_b}B · ${defaultPick.open_weight ? "open" : "closed"}`; | |
| } | |
| } | |
| function classifyVerdict(payback_days) { | |
| // GPU FITS the model. Tier reflects economic case only. | |
| if (!isFinite(payback_days) || payback_days <= 0) return {tier:"info", label:"NO PAYBACK"}; | |
| if (payback_days <= 365) return {tier:"buy", label:"BUY"}; | |
| if (payback_days <= 365*3) return {tier:"maybe", label:"MAYBE"}; | |
| return {tier:"info", label:"SLOW"}; | |
| } | |
| function failBadge(reason) { | |
| const map = { | |
| "weights": {label: "weights too big", color: "bg-rose-500/15 text-rose-300"}, | |
| "kv": {label: "context too big", color: "bg-amber-500/15 text-amber-300"}, | |
| "quant-gpu": {label: "quant unsupported", color: "bg-rose-500/15 text-rose-300"}, | |
| "quant-engine":{label: "engine mismatch", color: "bg-rose-500/15 text-rose-300"}, | |
| "tps": {label: "below TPS target", color: "bg-amber-500/15 text-amber-300"}, | |
| }; | |
| return map[reason] || {label: "won't fit", color: "bg-rose-500/15 text-rose-300"}; | |
| } | |
| function paybackPhrase(pb) { | |
| return isFinite(pb) ? `${fmtDays(pb)} payback` : `doesn't pay back at this usage`; | |
| } | |
| function isMainstream(r) { | |
| return r.providers.length >= 2 && !r.id.includes(":") && !/solidity|roleplay|erp|nsfw|uncensored/i.test(r.id); | |
| } | |
| function pillClass(tier) { | |
| return ({ | |
| buy: "bg-emerald-500/15 text-emerald-300", | |
| maybe: "bg-amber-500/15 text-amber-300", | |
| info: "bg-zinc-700/40 text-zinc-300", | |
| skip: "bg-rose-500/15 text-rose-300", | |
| })[tier] || "bg-zinc-700/40 text-zinc-300"; | |
| } | |
| const _ignored_old_renderTradeoffs = function (best, allFitting, m) { | |
| // Cheaper: only if strictly cheaper than best AND in budget AND fits | |
| const strictlyCheaper = best ? allFitting.filter(s => s.price < best.price).sort((a,b) => a.price - b.price)[0] : null; | |
| // Headroom: next tier up by VRAM (or price if same VRAM); flag over-budget separately | |
| const headroom = best | |
| ? (allFitting.find(s => s.g.vram > best.g.vram) || allFitting.find(s => s.price > best.price)) | |
| : allFitting[1]; | |
| const card = (title, subtitle, accent, body) => ` | |
| <div class="rounded-2xl bg-zinc-900/30 grad-border glow p-4 space-y-2"> | |
| <div class="text-[10px] uppercase tracking-[.18em] ${accent}">${title}</div> | |
| <div class="text-sm font-medium">${subtitle}</div> | |
| <div class="text-[12px] text-zinc-400 leading-relaxed">${body}</div> | |
| </div>`; | |
| let html = ""; | |
| if (strictlyCheaper) { | |
| const overBudget = strictlyCheaper.price > S.budget; | |
| const savings = best.price - strictlyCheaper.price; | |
| html += card( | |
| "Cheaper alternative", | |
| `${strictlyCheaper.g.name} · ${fmtMoney(strictlyCheaper.price)}`, | |
| "text-emerald-300/80", | |
| `Saves ${fmtMoney(savings)} upfront · ${paybackPhrase(strictlyCheaper.pb)} · ${strictlyCheaper.tps.toFixed(0)} tok/s · ${strictlyCheaper.g.vram}GB${overBudget ? ` · <span class="text-amber-300">over budget</span>` : ""}` | |
| ); | |
| } | |
| if (headroom && headroom !== strictlyCheaper && (!best || headroom.g !== best.g)) { | |
| const overBudget = headroom.price > S.budget; | |
| const extraVram = best ? headroom.g.vram - best.g.vram : 0; | |
| const extraTps = best ? headroom.tps - best.tps : 0; | |
| const extraCost = best ? headroom.price - best.price : 0; | |
| html += card( | |
| "More headroom · future-proof", | |
| `${headroom.g.name} · ${fmtMoney(headroom.price)}`, | |
| "text-indigo-300/80", | |
| `${extraVram > 0 ? `+${extraVram}GB VRAM · ` : ""}${extraTps > 0 ? `+${extraTps.toFixed(0)} tok/s · ` : ""}${paybackPhrase(headroom.pb)}${extraCost > 0 ? ` · ${fmtMoney(extraCost)} more` : ""}${overBudget ? ` · <span class="text-amber-300">over budget</span>` : ""}` | |
| ); | |
| } | |
| html += ` | |
| <button id="already-own-btn" class="text-left rounded-2xl bg-zinc-900/30 grad-border glow p-4 space-y-2 hover:bg-zinc-900/60 transition"> | |
| <div class="text-[10px] uppercase tracking-[.18em] text-zinc-400">Already own a GPU?</div> | |
| <div class="text-sm font-medium">Pick yours →</div> | |
| <div class="text-[12px] text-zinc-500">See payback at any price you paid.</div> | |
| </button>`; | |
| return html; | |
| } | |
| function renderModelTab() { | |
| if (!S.modelId) return; | |
| const m = DATA.find(r => r.id === S.modelId); | |
| if (!m) return; | |
| // Cap context slider to model's max | |
| const ctxEl = $("ctx"); | |
| const wantedCtx = ctxEl.value === "max" ? m.context : Math.pow(2, parseFloat(ctxEl.value)); | |
| const overMax = m.context && wantedCtx > m.context; | |
| $("ctx-warn").classList.toggle("hidden", !overMax); | |
| const ctx = ctxTokens(m); | |
| const meta = `${m.params_total_b}B${m.params_active_b !== m.params_total_b ? " (A"+m.params_active_b+"B)" : ""} · ${m.open_weight ? "open" : "closed"} · max ${(m.context/1024).toFixed(0)}K · weights ${weightsGB(m).toFixed(1)}GB · KV@${(ctx/1024).toFixed(0)}K ${kvGB(m, ctx).toFixed(1)}GB`; | |
| $("model-meta").textContent = meta; | |
| // Score every visible GPU | |
| const scored = visibleGpus().map(g => { | |
| const f = fitCheck(m, g, {ctx}); | |
| const tps = tpsFor(m, g); | |
| const pb = f.ok ? paybackDays(m, g) : Infinity; | |
| const dailySave = f.ok ? dailyTcoSavings(m, g) : 0; | |
| return { g, f, tps, pb, dailySave, price: gpuPrice(g), netyr: dailySave * 365 }; | |
| }); | |
| const fitGpus = scored.filter(s => s.f.ok); | |
| const failGpus = scored.filter(s => !s.f.ok); | |
| const inBudget = fitGpus.filter(s => s.price <= S.budget); | |
| const overBudget = fitGpus.filter(s => s.price > S.budget); | |
| if ($("budget-fits-count")) $("budget-fits-count").textContent = inBudget.length; | |
| $("gpu-count").textContent = `${fitGpus.length} fit · ${failGpus.length} won't · ${inBudget.length} in budget`; | |
| // Hide the "show fails" toggle when no failures | |
| if ($("show-fails-label")) $("show-fails-label").style.display = failGpus.length === 0 ? "none" : ""; | |
| if ($("show-fails-text")) $("show-fails-text").textContent = `show ${failGpus.length} GPU${failGpus.length === 1 ? "" : "s"} that don't fit`; | |
| // ─────────── Verdict strip (single row) ─────────── | |
| const cheapestFitting = fitGpus.slice().sort((a,b) => a.price - b.price)[0]; | |
| const cheapestInBudget = inBudget.slice().sort((a,b) => a.price - b.price)[0]; | |
| const verdictPick = cheapestInBudget || cheapestFitting; | |
| if (!verdictPick) { | |
| $("verdict-model").className = "rounded-xl border-2 grad-border glow px-5 py-4 verdict-skip flex items-center justify-between gap-4 flex-wrap"; | |
| const reasons = [...new Set(failGpus.map(s => s.f.reason))]; | |
| const hint = reasons.includes("kv") ? "Try shorter context or Q8 KV cache." : | |
| reasons.includes("weights") ? "Try a smaller quant (Q4_K_S / Q3_K_M)." : | |
| reasons.includes("quant-engine") ? `${S.engine} doesn't support ${S.quant}.` : | |
| reasons.includes("tps") ? "Lower the min-TPS target." : | |
| "No path to fit on consumer hardware."; | |
| $("verdict-model").innerHTML = ` | |
| <div class="space-y-1"> | |
| <div class="text-[10px] uppercase tracking-[.18em] text-rose-300/80">Won't fit</div> | |
| <div class="text-xl font-semibold">Can't run ${m.name} locally with current settings</div> | |
| </div> | |
| <div class="text-[12px] text-zinc-400">${hint}</div>`; | |
| } else { | |
| const v = classifyVerdict(verdictPick.pb); | |
| const overBudgetFlag = !cheapestInBudget; | |
| const overage = overBudgetFlag ? verdictPick.price - S.budget : 0; | |
| const headroom = !overBudgetFlag ? S.budget - verdictPick.price : 0; | |
| const tier = overBudgetFlag ? 'maybe' : v.tier; | |
| const accentColor = {buy:'text-emerald-300/80', maybe:'text-amber-300/80', info:'text-zinc-400', skip:'text-rose-300/80'}[tier]; | |
| const headline = overBudgetFlag ? "Over budget · cheapest that fits" | |
| : v.tier === "buy" ? "Cheapest GPU that pays back fast" | |
| : v.tier === "maybe" ? "Cheapest GPU · pays back over years" | |
| : "Cheapest GPU that fits · won't pay back at this usage"; | |
| const bd = dailyApiBreakdown(m); | |
| $("verdict-model").className = `rounded-xl border-2 grad-border glow px-5 py-4 verdict-${tier} space-y-2`; | |
| $("verdict-model").innerHTML = ` | |
| <div class="flex items-center justify-between gap-4 flex-wrap"> | |
| <div class="flex items-baseline gap-3 flex-wrap"> | |
| <div class="text-[10px] uppercase tracking-[.18em] ${accentColor}">${headline}</div> | |
| <div class="text-2xl font-semibold tracking-tight">${verdictPick.g.name}</div> | |
| <div class="text-[12px] text-zinc-400 mono"> | |
| <span data-edit-gpu-price="${verdictPick.g.name}" class="cursor-pointer underline decoration-dotted decoration-zinc-600 underline-offset-4 hover:text-indigo-300">${fmtMoney(verdictPick.price)}</span> | |
| ${S.priceOverrides[verdictPick.g.name] !== undefined ? ` <span class="pill bg-indigo-500/20 text-indigo-200">custom</span>` : ""} | |
| · ${verdictPick.tps.toFixed(0)} tok/s | |
| · ${verdictPick.f.vram_used.toFixed(1)}/${verdictPick.g.vram} GB | |
| ${overBudgetFlag ? ` · <span class="text-amber-300">${fmtMoney(overage)} over budget</span>` : headroom > 0 ? ` · <span class="text-zinc-500">${fmtMoney(headroom)} under $${S.budget.toLocaleString()} budget</span>` : ""} | |
| </div> | |
| </div> | |
| <div class="text-right text-[12px] mono"> | |
| <div class="${verdictPick.netyr > 0 ? 'text-emerald-300' : 'text-rose-300'} font-medium">${verdictPick.netyr > 0 ? "saves" : "loses"} ${fmtMoney(Math.abs(verdictPick.netyr))}/yr</div> | |
| <div class="text-zinc-500">${paybackPhrase(verdictPick.pb)}</div> | |
| </div> | |
| </div> | |
| ${bd ? `<div class="pt-2 border-t border-white/5 grid grid-cols-2 md:grid-cols-4 gap-2 text-[11px] mono"> | |
| <div><span class="text-zinc-500">API price</span> $${bd.in_per_mtok.toFixed(2)} in / $${bd.out_per_mtok.toFixed(2)} out per Mtok</div> | |
| <div><span class="text-zinc-500">API spend / day</span> <span class="text-rose-300">${fmtMoney(bd.total)}</span></div> | |
| <div><span class="text-zinc-500">GPU power / day</span> ${fmtMoney(dailyPower(verdictPick.g))} <span class="text-zinc-600">(${verdictPick.g.tdp}W × ${(util()*100)|0}% × $${kwh().toFixed(3)}/kWh)</span></div> | |
| <div><span class="text-zinc-500">GPU amort / day</span> ${fmtMoney(verdictPick.price/(years()*365))} <span class="text-zinc-600">(over ${years()}y)</span></div> | |
| </div>` : ""}`; | |
| // Show-the-math panel | |
| if (bd) { | |
| const dailyP = dailyPower(verdictPick.g); | |
| const dailyA = verdictPick.price / (years() * 365); | |
| const totalDaily = dailyP + dailyA; | |
| const netDaily = bd.total - dailyP; | |
| const netDailyTco = bd.total - totalDaily; | |
| $("math-body").textContent = [ | |
| `Workload`, | |
| ` ${fmtTok(bd.out)} output tokens/day`, | |
| ` ${fmtTok(bd.inn)} input tokens/day (${ioRatio()}× ratio)`, | |
| ``, | |
| `API spend / day`, | |
| ` input : ${fmtTok(bd.inn)} × $${bd.in_per_mtok.toFixed(2)}/Mtok = ${fmtMoney(bd.in_cost)}`, | |
| ` output : ${fmtTok(bd.out)} × $${bd.out_per_mtok.toFixed(2)}/Mtok = ${fmtMoney(bd.out_cost)}`, | |
| ` total : ${fmtMoney(bd.total)}/day`, | |
| ``, | |
| `Local hardware (${verdictPick.g.name})`, | |
| ` GPU price : ${fmtMoney(verdictPick.price)} (${priceLabel(verdictPick.g)})`, | |
| ` TDP × util × kWh: ${verdictPick.g.tdp}W × ${(util()*100).toFixed(0)}% × 24h × $${kwh().toFixed(3)} = ${fmtMoney(dailyP)}/day power`, | |
| ` Amortized : ${fmtMoney(verdictPick.price)} ÷ (${years()}y × 365d) = ${fmtMoney(dailyA)}/day`, | |
| ` Total daily TCO : ${fmtMoney(totalDaily)}/day`, | |
| ``, | |
| `Break-even (vs sticker price, electricity-aware)`, | |
| ` Net daily savings = API spend − power = ${fmtMoney(bd.total)} − ${fmtMoney(dailyP)} = ${fmtMoney(netDaily)}`, | |
| ` Payback = ${fmtMoney(verdictPick.price)} ÷ ${fmtMoney(netDaily)}/day = ${paybackPhrase(verdictPick.pb)}`, | |
| ``, | |
| `Year-1 net (full TCO including amort)`, | |
| ` (API spend − power − amort) × 365 = ${fmtMoney(netDailyTco)}/day × 365 = ${fmtMoney(netDailyTco * 365)}/yr`, | |
| ``, | |
| `TPS estimate`, | |
| ` Anchor: RTX 5070 Ti (896 GB/s) @ Qwen3.5-9B Q4_K_M = 110 tok/s observed`, | |
| ` Calibration: ${(ANCHOR_EFF*100).toFixed(0)}% of theoretical bandwidth-bound peak`, | |
| ` This GPU: ${verdictPick.g.bandwidth} GB/s ÷ (${m.params_active_b}B × ${currentQuant().bpp.toFixed(2)} B/param) × ${engineMult()}× engine × ${spec().toFixed(2)}× spec = ${verdictPick.tps.toFixed(0)} tok/s`, | |
| ` Real-world: ±25%. Memory-bandwidth-bound single-stream decode model.`, | |
| ].join("\n"); | |
| } else { | |
| $("math-body").textContent = "(no API price for this model — can't compute API cost.)"; | |
| } | |
| } | |
| // ─────────── Ranked GPU table ─────────── | |
| const sortKey = S.sortGpu || "price"; | |
| const showFails = $("show-fails")?.checked; | |
| const sortFn = { | |
| price: (a,b) => a.price - b.price, | |
| tps: (a,b) => b.tps - a.tps, | |
| payback: (a,b) => a.pb - b.pb, | |
| netyr: (a,b) => b.netyr - a.netyr, | |
| }[sortKey] || ((a,b) => a.price - b.price); | |
| const visibleRows = [...fitGpus.sort(sortFn), ...(showFails ? failGpus.sort((a,b) => a.g.vram - b.g.vram) : [])]; | |
| $("gpu-table-body").innerHTML = visibleRows.map(({g, f, tps, pb, dailySave, price, netyr}) => { | |
| const isVerdict = verdictPick && g === verdictPick.g; | |
| const v2 = f.ok ? classifyVerdict(pb) : null; | |
| const overBudget = f.ok && price > S.budget; | |
| if (f.ok) { | |
| return `<tr class="row ${isVerdict ? 'bg-indigo-500/[.04]' : ''} cursor-pointer" data-gpu="${g.name}"> | |
| <td class="px-4 py-2.5"><div class="flex items-center gap-2"><span class="text-sm">${g.name}</span><span class="text-[10px] text-zinc-500 mono">${g.tier}</span></div></td> | |
| <td class="px-4 py-2.5 text-right mono"><span data-edit-gpu-price="${g.name}" class="hover:text-indigo-300 underline decoration-dotted decoration-zinc-700 underline-offset-4 ${overBudget ? 'text-amber-300' : ''}" onclick="event.stopPropagation()">${fmtMoney(price)}</span>${S.priceOverrides[g.name] !== undefined ? ' <span class="text-[10px] text-indigo-300/80">·custom</span>' : ''}</td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">${f.vram_used.toFixed(1)}/${g.vram}${f.downgraded ? ` <span class="pill bg-amber-500/15 text-amber-300/90 text-[9px]" title="Doesn't fit at ${S.kvQuant} KV; auto-fell-back to ${f.kv_used} KV cache to fit. Quality impact minimal.">${f.kv_used} KV</span>` : ''}</td> | |
| <td class="px-4 py-2.5 text-right mono">${tps.toFixed(0)}</td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">${isFinite(pb) ? fmtDays(pb) : "—"}</td> | |
| <td class="px-4 py-2.5 text-right mono ${netyr > 0 ? 'text-emerald-300' : 'text-rose-300'}">${fmtMoney(netyr)}</td> | |
| <td class="px-4 py-2.5 text-right"><span class="pill ${pillClass(overBudget ? "maybe" : v2.tier)}">${overBudget ? "OVER" : v2.label}</span></td> | |
| </tr>`; | |
| } else { | |
| const fb = failBadge(f.reason); | |
| return `<tr class="row opacity-50"> | |
| <td class="px-4 py-2.5"><span class="text-sm line-through decoration-zinc-600">${g.name}</span> <span class="text-[10px] text-zinc-500 mono">${g.tier}</span></td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-500">${fmtMoney(price)}</td> | |
| <td colspan="4" class="px-4 py-2.5 text-[11px] text-zinc-500 italic">${f.detail}</td> | |
| <td class="px-4 py-2.5 text-right"><span class="pill ${fb.color}">${fb.label}</span></td> | |
| </tr>`; | |
| } | |
| }).join(""); | |
| $("gpu-table-body").querySelectorAll("tr[data-gpu]").forEach(tr => tr.addEventListener("click", e => { | |
| if (e.target.closest("[data-edit-gpu-price]")) return; | |
| S.gpuName = tr.dataset.gpu; | |
| const sel = $("gpu-select"); if (sel) sel.value = S.gpuName; | |
| setTab("gpu"); | |
| })); | |
| // Hint line | |
| let hint = ""; | |
| const downgradedCount = fitGpus.filter(s => s.f.downgraded).length; | |
| if (downgradedCount > 0) { | |
| hint = `<span class="text-amber-300/80">${downgradedCount}</span> GPU${downgradedCount === 1 ? "" : "s"} fit by auto-falling-back to a smaller KV cache (annotated in VRAM column).`; | |
| } else if (failGpus.length && !showFails) { | |
| const reasons = [...new Set(failGpus.map(s => s.f.reason))]; | |
| if (reasons.includes("weights")) hint = `${failGpus.length} GPU${failGpus.length===1?"":"s"} don't fit at ${currentQuant().label}. Try Q4_K_S — may bring smaller cards into range.`; | |
| else if (reasons.includes("kv")) hint = `Some GPUs blocked even at Q4 KV cache. Shorten the context to fit.`; | |
| else if (reasons.includes("tps")) hint = `Some GPUs below your ${minTps()} t/s target. Lower the bar to see them.`; | |
| else hint = `${failGpus.length} GPU${failGpus.length===1?"":"s"} don't fit. Toggle "show GPUs that don't fit" to see why.`; | |
| } | |
| $("hint-line").innerHTML = hint; | |
| renderApiTable(m); | |
| } | |
| function renderApiTable(m) { | |
| const rows = m.providers.slice(0, 8).map(p => ` | |
| <tr class="row border-t border-white/5"> | |
| <td class="px-4 py-2 text-sm">${p.provider}</td> | |
| <td class="px-4 py-2 text-xs text-zinc-500">${p.quantization && p.quantization !== "unknown" ? p.quantization : "—"}</td> | |
| <td class="px-4 py-2 text-right mono num">$${p.prompt_per_mtok.toFixed(2)}</td> | |
| <td class="px-4 py-2 text-right mono num">$${p.completion_per_mtok.toFixed(2)}</td> | |
| <td class="px-4 py-2 text-right mono text-zinc-500">${p.throughput_tps?.toFixed?.(0) ?? "—"}</td> | |
| <td class="px-4 py-2 text-right mono text-zinc-500">${p.uptime_1d?.toFixed?.(1) ?? "—"}%</td> | |
| </tr>`).join(""); | |
| const front = FRONTIER.map(f => ` | |
| <tr class="border-t border-white/5 bg-black/20"> | |
| <td class="px-4 py-2 text-xs text-zinc-400">${f.label}</td> | |
| <td class="px-4 py-2 text-xs text-zinc-500 truncate max-w-[160px]">${f.model}</td> | |
| <td class="px-4 py-2 text-right mono num text-zinc-400">$${f.prompt_per_mtok.toFixed(2)}</td> | |
| <td class="px-4 py-2 text-right mono num text-zinc-400">$${f.completion_per_mtok.toFixed(2)}</td> | |
| <td class="px-4 py-2"></td><td class="px-4 py-2"></td> | |
| </tr>`).join(""); | |
| $("api-table").innerHTML = ` | |
| <table class="w-full text-sm"> | |
| <thead><tr class="text-left"> | |
| <th class="px-4 py-2">Provider</th><th class="px-4 py-2">Quant</th> | |
| <th class="px-4 py-2 text-right">$/Mtok in</th><th class="px-4 py-2 text-right">$/Mtok out</th> | |
| <th class="px-4 py-2 text-right">TPS</th><th class="px-4 py-2 text-right">Uptime 1d</th> | |
| </tr></thead> | |
| <tbody>${rows}${front}</tbody> | |
| </table>`; | |
| } | |
| // ============================ GPU TAB | |
| function setupGpuSelect() { | |
| const sel = $("gpu-select"); | |
| sel.innerHTML = visibleGpus().map(g => `<option value="${g.name}">${g.name} · ${g.vram}GB · ${g.tier}</option>`).join(""); | |
| sel.value = S.gpuName; | |
| sel.addEventListener("change", () => { S.gpuName = sel.value; renderGpuTab(); }); | |
| } | |
| function renderGpuTab() { | |
| const g = GPUS.find(x => x.name === S.gpuName) || GPUS[0]; | |
| // Sync headline price input | |
| const hp = $("gpu-price-headline"); | |
| if (hp && document.activeElement !== hp) hp.value = gpuPrice(g); | |
| $("gpu-price-context").textContent = `MSRP ${fmtMoney(g.msrp)} · street ${fmtMoney(g.street)}${S.priceOverrides[g.name] ? " · using your override" : ""}`; | |
| $("gpu-summary").innerHTML = ` | |
| <div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">VRAM</div><div class="text-base mono">${g.vram} GB</div></div> | |
| <div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">Bandwidth</div><div class="text-base mono">${g.bandwidth} GB/s</div></div> | |
| <div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">TDP</div><div class="text-base mono">${g.tdp} W</div></div> | |
| <div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">MSRP</div><div class="text-base mono">${fmtMoney(g.msrp)}</div></div> | |
| <div class="bg-zinc-950/40 rounded-lg p-3"><div class="text-[10px] uppercase tracking-wider text-zinc-500">Street</div><div class="text-base mono">${fmtMoney(g.street)}</div></div>`; | |
| // Best models for this GPU (open-weight, fits in VRAM with chosen context + KV) | |
| const ctx = ctxTokens({context: 1e9}); | |
| const fitting = DATA | |
| .filter(r => r.params_total_b && r.cheapest && r.open_weight) | |
| .map(r => { | |
| const f = fitCheck(r, g, {ctx}); | |
| if (!f.ok) return null; | |
| const tps = tpsFor(r, g); | |
| const pb = paybackDays(r, g); | |
| const dailySave = dailyTcoSavings(r, g); | |
| return { r, tps, pb, dailySave, vram_used: f.vram_used, kv_used: f.kv_used, downgraded: f.downgraded }; | |
| }) | |
| .filter(Boolean) | |
| .sort((a, b) => b.dailySave - a.dailySave); | |
| // Verdict pool: mainstream only (helper defined module-scope below) | |
| const verdictPool = fitting.filter(x => isMainstream(x.r)); | |
| if (fitting.length === 0) { | |
| $("verdict-gpu").className = "rounded-xl border-2 grad-border glow px-5 py-4 verdict-skip flex items-center gap-4 flex-wrap"; | |
| $("verdict-gpu").innerHTML = ` | |
| <div class="space-y-1"><div class="text-[10px] uppercase tracking-[.18em] text-rose-300/80">Nothing fits</div> | |
| <div class="text-xl font-semibold">No open-weight models fit ${currentQuant().label} on this card</div></div> | |
| <div class="text-[12px] text-zinc-400">Try a smaller quant or shorter context.</div>`; | |
| } else { | |
| const top = (verdictPool.length ? verdictPool : fitting)[0]; | |
| const v = classifyVerdict(top.pb); | |
| const yrNet = top.dailySave * 365; | |
| const accent = {buy:'text-emerald-300/80', maybe:'text-amber-300/80', info:'text-zinc-400', skip:'text-rose-300/80'}[v.tier]; | |
| $("verdict-gpu").className = `rounded-xl border-2 grad-border glow px-5 py-4 verdict-${v.tier} flex items-center justify-between gap-4 flex-wrap`; | |
| $("verdict-gpu").innerHTML = ` | |
| <div class="flex items-baseline gap-3 flex-wrap"> | |
| <div class="text-[10px] uppercase tracking-[.18em] ${accent}">Best mainstream model</div> | |
| <div class="text-2xl font-semibold tracking-tight">${top.r.name}</div> | |
| <div class="text-[12px] text-zinc-400 mono">${top.r.params_total_b}B · ${top.tps.toFixed(0)} tok/s · ${top.vram_used.toFixed(1)}/${g.vram} GB</div> | |
| </div> | |
| <div class="text-right text-[12px] mono"> | |
| <div class="${yrNet > 0 ? 'text-emerald-300' : 'text-rose-300'} font-medium">${yrNet > 0 ? "saves" : "loses"} ${fmtMoney(Math.abs(yrNet))}/yr</div> | |
| <div class="text-zinc-500">${paybackPhrase(top.pb)} · ${fitting.length} models fit</div> | |
| </div>`; | |
| } | |
| // Models table — sortable + searchable, default newest first | |
| const sortKey = $("gpu-model-sort")?.value || "created"; | |
| const search = ($("gpu-model-search")?.value || "").toLowerCase().trim(); | |
| const mainstreamCb = $("gpu-mainstream-only"); | |
| const mainstreamOnly = mainstreamCb ? mainstreamCb.checked : true; | |
| const sortFn = { | |
| created: (a,b) => (b.r.created || 0) - (a.r.created || 0), | |
| tps: (a,b) => b.tps - a.tps, | |
| dailySave: (a,b) => b.dailySave - a.dailySave, | |
| payback: (a,b) => a.pb - b.pb, | |
| params: (a,b) => b.r.params_total_b - a.r.params_total_b, | |
| }[sortKey] || ((a,b) => (b.r.created || 0) - (a.r.created || 0)); | |
| let filtered = fitting; | |
| if (mainstreamOnly) filtered = filtered.filter(x => isMainstream(x.r)); | |
| if (search) filtered = filtered.filter(x => (x.r.name||"").toLowerCase().includes(search) || x.r.id.toLowerCase().includes(search)); | |
| $("gpu-models").innerHTML = filtered.length === 0 | |
| ? `<div class="p-6 text-sm text-zinc-500">No matching models. ${mainstreamOnly ? "Try unchecking 'mainstream only'." : "Try a different filter."}</div>` | |
| : `<table class="w-full text-sm"> | |
| <thead><tr class="text-zinc-500 text-[10px] uppercase tracking-wider"> | |
| <th class="text-left px-4 py-2">Model</th> | |
| <th class="text-right px-4 py-2">Params</th> | |
| <th class="text-right px-4 py-2">VRAM used</th> | |
| <th class="text-right px-4 py-2">TPS</th> | |
| <th class="text-right px-4 py-2">API $/Mtok</th> | |
| <th class="text-right px-4 py-2">Save/day</th> | |
| <th class="text-right px-4 py-2">Payback</th> | |
| <th class="text-right px-4 py-2">Verdict</th> | |
| </tr></thead> | |
| <tbody class="divide-y divide-white/5"> | |
| ${filtered.sort(sortFn).slice(0, 30).map(({r, tps, pb, dailySave, vram_used, kv_used, downgraded}) => { | |
| const v = classifyVerdict(pb); | |
| const apiP = (apiPerToken(r) || 0) * 1e6; | |
| const niche = !isMainstream(r); | |
| const released = r.created ? new Date(r.created * 1000).toLocaleDateString(undefined, {year:"2-digit", month:"short"}) : ""; | |
| return `<tr class="row"> | |
| <td class="px-4 py-2.5"><div class="text-sm truncate max-w-[260px]">${r.name}${niche ? ' <span class="text-[10px] text-zinc-600">niche</span>' : ''}</div><div class="text-[10px] text-zinc-500 mono truncate max-w-[260px]">${r.id}${released ? ` · ${released}` : ""}</div></td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">${r.params_total_b}B${r.params_active_b !== r.params_total_b ? `<span class="text-zinc-600 text-[10px]"> A${r.params_active_b}B</span>` : ""}</td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">${vram_used.toFixed(1)}/${g.vram}${downgraded ? ` <span class="pill bg-amber-500/15 text-amber-300/90 text-[9px]" title="Auto-fell-back to ${kv_used} KV to fit.">${kv_used} KV</span>` : ''}</td> | |
| <td class="px-4 py-2.5 text-right mono">${tps.toFixed(0)}</td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">$${apiP.toFixed(2)}</td> | |
| <td class="px-4 py-2.5 text-right mono ${dailySave > 0 ? 'text-emerald-300' : 'text-rose-300'}">${fmtMoney(dailySave)}</td> | |
| <td class="px-4 py-2.5 text-right mono text-zinc-400">${isFinite(pb) ? fmtDays(pb) : "—"}</td> | |
| <td class="px-4 py-2.5 text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></td> | |
| </tr>`; | |
| }).join("")} | |
| </tbody> | |
| </table>`; | |
| $("gpu-models-footer").textContent = `Showing ${Math.min(30, filtered.length)} of ${filtered.length}${mainstreamOnly ? ` mainstream` : ``} models that fit · sorted by ${sortKey}`; | |
| // Bigger GPUs | |
| const bigger = visibleGpus().filter(x => x.vram > g.vram).slice(0, 4); | |
| $("bigger-gpus").innerHTML = bigger.map(b => { | |
| const newModelsCount = DATA.filter(r => r.params_total_b && r.cheapest && r.open_weight && !fitCheck(r, g, {ctx}).ok && fitCheck(r, b, {ctx}).ok).length; | |
| return `<div class="row px-4 py-3 flex items-center justify-between text-sm"> | |
| <div><div>${b.name}</div><div class="text-[11px] text-zinc-500 mono">${b.vram}GB · ${b.tier} · ${fmtMoney(gpuPrice(b))}</div></div> | |
| <div class="text-xs mono text-zinc-400">+${newModelsCount} more models would fit</div> | |
| </div>`; | |
| }).join("") || `<div class="p-4 text-xs text-zinc-500">This is already a top-tier GPU.</div>`; | |
| } | |
| // ============================ BROWSE TAB (compact version of original) | |
| function renderBrowse() { | |
| const search = $("search").value.toLowerCase(); | |
| const openOnly = $("open-only").checked; | |
| const vlim = parseInt($("vram").value); | |
| const ctx = 8192; | |
| const candidates = DATA | |
| .filter(r => r.cheapest && r.params_total_b) | |
| .filter(r => !openOnly || r.open_weight) | |
| .filter(r => !search || (r.name||"").toLowerCase().includes(search) || r.id.toLowerCase().includes(search)) | |
| .map(r => { | |
| const eligibleGpus = GPUS | |
| .filter(g => g.vram <= vlim && fitCheck(r, g, {ctx}).ok) | |
| .sort((a,b) => gpuPrice(a) - gpuPrice(b)); | |
| const gpu = eligibleGpus[0]; | |
| if (!gpu) return null; | |
| const pb = paybackDays(r, gpu); | |
| const dailySave = dailyTcoSavings(r, gpu); | |
| return { r, gpu, pb, dailySave }; | |
| }) | |
| .filter(Boolean) | |
| .sort((a,b) => a.pb - b.pb); | |
| $("kpis").innerHTML = ` | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Models matching</div> | |
| <div class="mt-1 text-2xl font-semibold">${candidates.length}</div> | |
| </div> | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Best payback</div> | |
| <div class="mt-1 text-2xl font-semibold">${candidates[0] ? fmtDays(candidates[0].pb) : "—"}</div> | |
| <div class="text-xs text-zinc-500 mt-1 truncate">${candidates[0] ? candidates[0].r.name + " on " + candidates[0].gpu.name : ""}</div> | |
| </div> | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Daily usage</div> | |
| <div class="mt-1 text-2xl font-semibold mono">${fmtTok(tpd())}</div> | |
| <div class="text-xs text-zinc-500 mt-1">tok/day</div> | |
| </div>`; | |
| $("model-table").innerHTML = candidates.slice(0, 50).map(c => { | |
| const v = classifyVerdict(c.pb); | |
| return `<details class="row"> | |
| <summary class="px-4 py-3 grid grid-cols-12 gap-3 items-center"> | |
| <div class="col-span-5 min-w-0"> | |
| <div class="text-sm truncate">${c.r.name}</div> | |
| <div class="text-[11px] text-zinc-500 mono truncate">${c.r.id} · ${c.r.params_total_b}B · ${c.r.providers.length} providers</div> | |
| </div> | |
| <div class="col-span-2 text-xs mono"><div>${c.gpu.name}</div><div class="text-zinc-500">${fmtMoney(gpuPrice(c.gpu))}</div></div> | |
| <div class="col-span-2 text-xs mono"><span class="text-zinc-500">save/d</span> <span class="${c.dailySave>0?'text-emerald-300':'text-rose-300'}">${fmtMoney(c.dailySave)}</span></div> | |
| <div class="col-span-2 text-xs mono"><span class="text-zinc-500">payback</span> ${fmtDays(c.pb)}</div> | |
| <div class="col-span-1 text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></div> | |
| </summary> | |
| <div class="px-4 py-3 bg-black/20 text-xs"> | |
| <table class="w-full mono"> | |
| <thead><tr class="text-zinc-500"><th class="text-left py-1">Provider</th><th class="text-left py-1">Quant</th><th class="text-right py-1">$/M in</th><th class="text-right py-1">$/M out</th><th class="text-right py-1">TPS</th><th class="text-right py-1">Up 1d</th></tr></thead> | |
| <tbody>${c.r.providers.map(p => `<tr><td class="py-0.5">${p.provider}</td><td class="text-zinc-500">${p.quantization || "?"}</td><td class="text-right">$${p.prompt_per_mtok.toFixed(2)}</td><td class="text-right">$${p.completion_per_mtok.toFixed(2)}</td><td class="text-right">${p.throughput_tps?.toFixed?.(0) ?? "—"}</td><td class="text-right">${p.uptime_1d?.toFixed?.(1) ?? "—"}%</td></tr>`).join("")}</tbody> | |
| </table> | |
| </div> | |
| </details>`; | |
| }).join(""); | |
| } | |
| // ============================ wiring | |
| function rerender() { | |
| renderAllValueLabels(); | |
| if (S.tab === "model") renderModelTab(); | |
| else if (S.tab === "gpu") renderGpuTab(); | |
| else renderBrowse(); | |
| } | |
| function bindToggleGroup(selector, key, onChange) { | |
| document.querySelectorAll(selector).forEach(b => b.addEventListener("click", () => { | |
| document.querySelectorAll(selector).forEach(x => x.dataset.active = x === b); | |
| if (key && b.dataset[key]) S[key] = b.dataset[key]; | |
| if (onChange) onChange(b); | |
| rerender(); | |
| })); | |
| } | |
| function setupTpdPresets(btnClass, sliderId) { | |
| document.querySelectorAll(btnClass).forEach(b => b.addEventListener("click", () => { | |
| document.querySelectorAll(btnClass).forEach(x => x.dataset.active = x === b); | |
| $(sliderId).value = b.dataset.tpd; | |
| // sync all three sliders so switching tabs feels coherent | |
| $("tpd").value = b.dataset.tpd; | |
| $("tpd2").value = b.dataset.tpd; | |
| $("tpd3").value = b.dataset.tpd; | |
| rerender(); | |
| })); | |
| } | |
| function setupCtxPresets(btnClass, sliderId, otherSliderId) { | |
| document.querySelectorAll(btnClass).forEach(b => b.addEventListener("click", () => { | |
| document.querySelectorAll(btnClass).forEach(x => x.dataset.active = x === b); | |
| if (b.dataset.ctx === "max") { | |
| // pick model max if available | |
| const m = DATA.find(r => r.id === S.modelId); | |
| const ctx = m && m.context ? m.context : 256000; | |
| const exp = Math.log2(ctx); | |
| $(sliderId).value = exp; | |
| if (otherSliderId) $(otherSliderId).value = exp; | |
| } else { | |
| $(sliderId).value = b.dataset.ctx; | |
| if (otherSliderId) $(otherSliderId).value = b.dataset.ctx; | |
| } | |
| rerender(); | |
| })); | |
| } | |
| // ============================ USAGE TAB | |
| // Defensive parser: accepts whatever shape OpenRouter returns. | |
| // Looks for an array with per-row {model, usage/cost, tokens, requests}. | |
| function normalizeUsage(raw) { | |
| if (!raw) return null; | |
| // Accept our combined shape {analytics, credits, auth, fetched_at} | |
| let analytics = raw.analytics || raw; | |
| if (analytics?.data) analytics = analytics.data; | |
| if (!Array.isArray(analytics)) { | |
| // Search nested objects for first array | |
| const found = findFirstArrayLike(raw); | |
| if (found) analytics = found; | |
| else return null; | |
| } | |
| const rows = analytics.map(r => { | |
| const id = r.model_permaslug || r.model || r.endpoint || r.name || r.permaslug; | |
| const cost = parseFloat(r.usage ?? r.cost ?? r.usage_in_credits ?? r.amount ?? 0); | |
| const inTok = parseInt(r.prompt_tokens ?? r.input_tokens ?? r.tokens_prompt ?? 0); | |
| const outTok = parseInt(r.completion_tokens ?? r.output_tokens ?? r.tokens_completion ?? 0); | |
| const reqs = parseInt(r.requests ?? r.count ?? 0); | |
| const date = r.date || r.day || r.created_at || null; | |
| return id ? {id, cost, inTok, outTok, reqs, date} : null; | |
| }).filter(Boolean); | |
| // Aggregate by model | |
| const byModel = new Map(); | |
| for (const r of rows) { | |
| const cur = byModel.get(r.id) || {id: r.id, cost: 0, inTok: 0, outTok: 0, reqs: 0, days: new Set()}; | |
| cur.cost += r.cost; cur.inTok += r.inTok; cur.outTok += r.outTok; cur.reqs += r.reqs; | |
| if (r.date) cur.days.add(r.date.slice(0,10)); | |
| byModel.set(r.id, cur); | |
| } | |
| const aggregated = [...byModel.values()].map(m => ({...m, days: m.days.size})).sort((a,b) => b.cost - a.cost); | |
| const dates = rows.map(r => r.date).filter(Boolean).sort(); | |
| return { | |
| rows: aggregated, | |
| raw_rows: rows.length, | |
| span: dates.length ? {from: dates[0]?.slice(0,10), to: dates[dates.length-1]?.slice(0,10)} : null, | |
| credits: raw.credits?.data || raw.credits || null, | |
| auth: raw.auth?.data || raw.auth || null, | |
| fetched_at: raw.fetched_at || new Date().toISOString(), | |
| }; | |
| } | |
| function findFirstArrayLike(o, depth = 0) { | |
| if (depth > 4 || !o) return null; | |
| if (Array.isArray(o) && o.length && typeof o[0] === "object") return o; | |
| if (typeof o === "object") { | |
| for (const v of Object.values(o)) { | |
| const f = findFirstArrayLike(v, depth + 1); | |
| if (f) return f; | |
| } | |
| } | |
| return null; | |
| } | |
| function persistUsage() { localStorage.setItem("or_usage_v1", JSON.stringify(S.usage)); } | |
| async function fetchUsage(apiKey) { | |
| const headers = {"Authorization": `Bearer ${apiKey}`, "Content-Type": "application/json"}; | |
| const j = async (path) => { | |
| const r = await fetch(`https://openrouter.ai${path}`, {headers}); | |
| if (!r.ok) throw new Error(`${path}: HTTP ${r.status}`); | |
| return r.json(); | |
| }; | |
| const [analytics, credits, auth] = await Promise.all([ | |
| j("/api/v1/analytics").catch(e => ({error: e.message})), | |
| j("/api/v1/credits").catch(e => ({error: e.message})), | |
| j("/api/v1/auth/key").catch(e => ({error: e.message})), | |
| ]); | |
| return {analytics, credits, auth, fetched_at: new Date().toISOString()}; | |
| } | |
| const BOOKMARKLET_JS = `javascript:(async()=>{try{const j=async p=>(await fetch(p,{credentials:'include'})).json();const [a,c,k]=await Promise.all([j('/api/v1/analytics'),j('/api/v1/credits'),j('/api/v1/auth/key')]);const blob=JSON.stringify({analytics:a,credits:c,auth:k,fetched_at:new Date().toISOString()});await navigator.clipboard.writeText(blob);alert('OpenRouter usage copied to clipboard ('+blob.length+' bytes). Paste it into the calculator.');}catch(e){alert('Failed: '+e.message);}})();`; | |
| function loadUsageFromText(text) { | |
| let parsed; | |
| try { parsed = JSON.parse(text); } catch (e) { setUsageStatus("Invalid JSON: " + e.message, true); return false; } | |
| const norm = normalizeUsage(parsed); | |
| if (!norm || !norm.rows.length) { setUsageStatus("Couldn't find usage rows in that JSON.", true); return false; } | |
| S.usage = norm; | |
| persistUsage(); | |
| setUsageStatus(`Loaded ${norm.rows.length} models from ${norm.raw_rows} raw rows · ${norm.span ? `${norm.span.from} → ${norm.span.to}` : "no date span"}`); | |
| renderUsageTab(); | |
| return true; | |
| } | |
| function setUsageStatus(msg, isError = false) { | |
| const el = $("usage-status"); | |
| if (el) { el.textContent = msg; el.className = "text-xs " + (isError ? "text-rose-300" : "text-emerald-300"); } | |
| } | |
| function renderUsageTab() { | |
| const u = S.usage; | |
| const empty = $("usage-empty"), kpis = $("usage-kpis"), split = $("usage-split"), rowsCard = $("usage-rows"); | |
| if (!u) { | |
| empty.classList.remove("hidden"); | |
| kpis.classList.add("hidden"); split.classList.add("hidden"); rowsCard.classList.add("hidden"); | |
| return; | |
| } | |
| empty.classList.add("hidden"); | |
| kpis.classList.remove("hidden"); split.classList.remove("hidden"); rowsCard.classList.remove("hidden"); | |
| // Match each usage row to DATA | |
| const matched = u.rows.map(r => { | |
| const m = DATA.find(d => d.id === r.id) || DATA.find(d => d.id.startsWith(r.id.split(":")[0])); | |
| return {...r, model: m, open: m ? m.open_weight : null}; | |
| }); | |
| const totalCost = matched.reduce((s,r) => s + r.cost, 0); | |
| const openRows = matched.filter(r => r.open === true); | |
| const closedRows = matched.filter(r => r.open === false); | |
| const unknownRows = matched.filter(r => r.open === null); | |
| const openCost = openRows.reduce((s,r) => s + r.cost, 0); | |
| const closedCost = closedRows.reduce((s,r) => s + r.cost, 0); | |
| const unknownCost = unknownRows.reduce((s,r) => s + r.cost, 0); | |
| const span = u.span ? `${u.span.from} → ${u.span.to}` : "all time"; | |
| const days = u.span ? Math.max(1, Math.round((new Date(u.span.to) - new Date(u.span.from)) / 86400000) + 1) : 30; | |
| // KPIs | |
| kpis.innerHTML = ` | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Total spend</div> | |
| <div class="mt-1 text-2xl font-semibold mono">${fmtMoney(totalCost)}</div> | |
| <div class="text-xs text-zinc-500 mt-1">${span} · ${days}d · ${matched.length} models</div> | |
| </div> | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Avg daily output tokens</div> | |
| <div class="mt-1 text-2xl font-semibold mono">${fmtTok(openRows.reduce((s,r) => s + r.outTok, 0) / days + closedRows.reduce((s,r) => s + r.outTok, 0) / days)}</div> | |
| <div class="text-xs text-zinc-500 mt-1">used to compute payback</div> | |
| </div> | |
| <div class="rounded-xl bg-zinc-900/40 grad-border glow p-4"> | |
| <div class="text-xs uppercase tracking-wider text-zinc-500">Snapshot</div> | |
| <div class="mt-1 text-2xl font-semibold mono">${new Date(u.fetched_at).toLocaleDateString()}</div> | |
| <div class="text-xs text-zinc-500 mt-1">refresh anytime</div> | |
| </div>`; | |
| // Open/closed split | |
| const g = GPUS.find(x => x.name === S.gpuName) || GPUS[2]; | |
| const openTotalTokens = openRows.reduce((s,r) => s + r.outTok + r.inTok/Math.max(1,ioRatio()), 0); | |
| const openDailyTokens = openRows.reduce((s,r) => s + r.outTok, 0) / days; | |
| // Hypothetical open cost on local: (open_daily_tokens × days) — but we can't run closed locally, so closed stays as-is. | |
| const localOpenDailyCost = (gpuPrice(g) / (years() * 365)) + dailyPower(g); | |
| const localOpenTotalCost = localOpenDailyCost * days; | |
| const openPayback = openCost / days > 0 ? gpuPrice(g) / (openCost / days - dailyPower(g)) : Infinity; | |
| split.innerHTML = ` | |
| <div class="rounded-2xl border-2 grad-border glow p-5 verdict-${openCost > localOpenTotalCost ? 'buy' : 'maybe'}"> | |
| <div class="text-[11px] uppercase tracking-[.18em] ${openCost > localOpenTotalCost ? 'text-emerald-300/80' : 'text-amber-300/80'}">Open-weight · could move local</div> | |
| <div class="text-2xl font-semibold mt-1 mono">${fmtMoney(openCost)}</div> | |
| <div class="text-xs text-zinc-400 mt-2">${openRows.length} models · ${fmtTok(openRows.reduce((s,r) => s + r.outTok, 0))} output tokens</div> | |
| <div class="text-[11px] text-zinc-500 mt-3 leading-relaxed"> | |
| At ${days}-day rate, your <span class="text-zinc-300">${g.name}</span> (${fmtMoney(gpuPrice(g))}) would cost <span class="text-zinc-300">${fmtMoney(localOpenTotalCost)}</span> in TCO — break-even <span class="text-zinc-300">${fmtDays(openPayback)}</span>. | |
| </div> | |
| </div> | |
| <div class="rounded-2xl bg-zinc-900/40 grad-border glow p-5"> | |
| <div class="text-[11px] uppercase tracking-[.18em] text-zinc-500">Closed · stays on API</div> | |
| <div class="text-2xl font-semibold mt-1 mono">${fmtMoney(closedCost)}</div> | |
| <div class="text-xs text-zinc-400 mt-2">${closedRows.length} closed-weight models${unknownRows.length ? ` · ${unknownRows.length} unknown ($${unknownCost.toFixed(2)})` : ""}</div> | |
| <div class="text-[11px] text-zinc-500 mt-3 leading-relaxed">No local equivalent without quality trade-off — this cost stays regardless of which GPU you buy.</div> | |
| </div>`; | |
| // Per-model rows | |
| $("usage-gpu-pick").textContent = `local cost computed for ${g.name} (change in 'Evaluate a GPU' tab)`; | |
| const sorted = matched.sort((a,b) => b.cost - a.cost).slice(0, 30); | |
| $("usage-rows-body").innerHTML = sorted.map(r => { | |
| const dailyOut = r.outTok / days; | |
| const dailyIn = r.inTok / days; | |
| const rowDailyTotal = dailyOut + dailyIn; | |
| let rightCol; | |
| if (r.open === true && r.model && r.model.params_total_b) { | |
| const fit = fitCheck(r.model, g, {ctx: 8192}); | |
| if (fit.ok) { | |
| const tps = tpsFor(r.model, g); | |
| // Hypothetical local cost share of GPU TCO, prorated by tokens | |
| const dailyLocal = (gpuPrice(g) / (years() * 365)) + dailyPower(g); | |
| const dailyApi = r.cost / days; | |
| const saving = dailyApi - dailyLocal; | |
| const yrSaving = saving * 365; | |
| const v = classifyVerdict(saving > 0 ? gpuPrice(g) / (saving + dailyPower(g)) : Infinity); | |
| rightCol = ` | |
| <div class="text-xs mono"><span class="text-zinc-500">local TCO/d</span> ${fmtMoney(dailyLocal)}</div> | |
| <div class="text-xs mono"><span class="text-zinc-500">save/d</span> <span class="${saving>0?'text-emerald-300':'text-rose-300'}">${fmtMoney(saving)}</span></div> | |
| <div class="text-xs mono"><span class="text-zinc-500">tps</span> ${tps.toFixed(0)}</div> | |
| <div class="text-right"><span class="pill ${pillClass(v.tier)}">${v.label}</span></div>`; | |
| } else { | |
| rightCol = `<div class="col-span-4 text-xs text-amber-300/80">${fit.detail}</div>`; | |
| } | |
| } else if (r.open === false) { | |
| rightCol = `<div class="col-span-4 text-xs text-zinc-500">closed · stays on API</div>`; | |
| } else { | |
| rightCol = `<div class="col-span-4 text-xs text-zinc-500">unknown model · not in catalog</div>`; | |
| } | |
| return `<div class="row px-4 py-3 grid grid-cols-12 gap-2 items-center"> | |
| <div class="col-span-5 min-w-0"> | |
| <div class="text-sm truncate">${r.model?.name || r.id}</div> | |
| <div class="text-[11px] text-zinc-500 mono truncate">${r.id}${r.open === true ? " · open" : r.open === false ? " · closed" : ""}</div> | |
| </div> | |
| <div class="col-span-3 text-xs mono"> | |
| <div><span class="text-zinc-500">spent</span> ${fmtMoney(r.cost)}</div> | |
| <div class="text-zinc-500">${fmtTok(r.outTok)} out · ${fmtTok(r.inTok)} in${r.reqs ? " · " + r.reqs + " reqs" : ""}</div> | |
| </div> | |
| ${rightCol} | |
| </div>`; | |
| }).join(""); | |
| } | |
| function init() { | |
| // Engine dropdown | |
| const sel = $("engine"); | |
| sel.innerHTML = ENGINES.engines.map(e => `<option value="${e.name}">${e.name} · ${e.single_stream}× / ${e.batched}× batched</option>`).join(""); | |
| sel.value = "llama.cpp"; | |
| sel.addEventListener("change", () => { S.engine = sel.value; rerender(); }); | |
| // Quant dropdown | |
| const qsel = $("quant"); | |
| qsel.innerHTML = QUANTS.map(q => `<option value="${q.id}">${q.label} · ${q.format}</option>`).join(""); | |
| qsel.value = "Q4_K_M"; | |
| qsel.addEventListener("change", () => { S.quant = qsel.value; rerender(); }); | |
| // Tabs | |
| document.querySelectorAll(".tab-btn").forEach(b => b.addEventListener("click", () => setTab(b.dataset.tab))); | |
| // Toggle groups | |
| bindToggleGroup(".mode-btn", "mode"); | |
| bindToggleGroup(".price-btn", "price"); | |
| bindToggleGroup(".conc-btn", "conc"); | |
| bindToggleGroup(".kv-btn", "kvQuant", b => { S.kvQuant = b.dataset.kv; localStorage.setItem("kv_quant", S.kvQuant); }); | |
| // Sliders | |
| ["tpd","tpd2","tpd3","io","years","util","kwh","spec","vram","search","open-only","ctx","ctx2","mintps"].forEach(id => { | |
| const el = $(id); | |
| if (!el) return; | |
| el.addEventListener("input", () => { | |
| if (id.startsWith("tpd")) { | |
| $("tpd").value = $("tpd2").value = $("tpd3").value = el.value; | |
| document.querySelectorAll(".preset-btn,.preset-btn2").forEach(b => b.dataset.active = false); | |
| } | |
| if (id === "ctx" || id === "ctx2") { | |
| $("ctx").value = $("ctx2").value = el.value; | |
| document.querySelectorAll(".ctx-btn,.ctx-btn2").forEach(b => b.dataset.active = false); | |
| } | |
| rerender(); | |
| }); | |
| }); | |
| setupTpdPresets(".preset-btn", "tpd"); | |
| setupTpdPresets(".preset-btn2", "tpd2"); | |
| setupCtxPresets(".ctx-btn", "ctx", "ctx2"); | |
| setupCtxPresets(".ctx-btn2", "ctx2", "ctx"); | |
| // GPU tab headline price input | |
| $("gpu-price-headline").addEventListener("input", e => { | |
| const v = parseFloat(e.target.value); | |
| if (v > 0) S.priceOverrides[S.gpuName] = v; else delete S.priceOverrides[S.gpuName]; | |
| persistOverrides(); | |
| rerender(); | |
| }); | |
| document.querySelectorAll(".gpu-priceset-btn").forEach(b => b.addEventListener("click", () => { | |
| const g = GPUS.find(x => x.name === S.gpuName); | |
| if (!g) return; | |
| const action = b.dataset.priceset; | |
| if (action === "reset") delete S.priceOverrides[g.name]; | |
| else S.priceOverrides[g.name] = g[action]; | |
| persistOverrides(); | |
| rerender(); | |
| })); | |
| // "tune ↓" → open the tune drawer and scroll into view | |
| $("open-tune")?.addEventListener("click", () => { | |
| const d = document.querySelector("details > summary > span"); | |
| const dd = d?.closest("details"); | |
| if (dd) { dd.open = true; dd.scrollIntoView({behavior: "smooth", block: "center"}); } | |
| }); | |
| // GPU table sort headers | |
| document.querySelectorAll(".sort-th").forEach(th => th.addEventListener("click", () => { | |
| S.sortGpu = th.dataset.sort; | |
| rerender(); | |
| })); | |
| // Show GPUs that don't fit | |
| $("show-fails")?.addEventListener("change", rerender); | |
| // GPU-tab model filters | |
| ["gpu-model-search", "gpu-model-sort", "gpu-mainstream-only"].forEach(id => { | |
| $(id)?.addEventListener("input", rerender); | |
| $(id)?.addEventListener("change", rerender); | |
| }); | |
| // Include data-center toggle | |
| const dcInput = $("include-dc"); | |
| if (dcInput) { | |
| dcInput.checked = S.includeDC; | |
| dcInput.addEventListener("change", () => { | |
| S.includeDC = dcInput.checked; | |
| persistDC(); | |
| // Also rebuild the GPU dropdown to reflect new visible set | |
| const sel = $("gpu-select"); | |
| if (sel) { | |
| sel.innerHTML = visibleGpus().map(g => `<option value="${g.name}">${g.name} · ${g.vram}GB · ${g.tier}</option>`).join(""); | |
| if (!visibleGpus().find(g => g.name === S.gpuName)) S.gpuName = visibleGpus()[2]?.name || visibleGpus()[0]?.name; | |
| sel.value = S.gpuName; | |
| } | |
| rerender(); | |
| }); | |
| } | |
| // Budget input + presets | |
| const budgetInput = $("budget"); | |
| budgetInput.value = S.budget; | |
| budgetInput.addEventListener("input", () => { | |
| const v = parseFloat(budgetInput.value); | |
| if (v > 0) { S.budget = v; persistBudget(); } | |
| document.querySelectorAll(".budget-btn").forEach(b => b.dataset.active = parseFloat(b.dataset.budget) === S.budget); | |
| rerender(); | |
| }); | |
| document.querySelectorAll(".budget-btn").forEach(b => b.addEventListener("click", () => { | |
| document.querySelectorAll(".budget-btn").forEach(x => x.dataset.active = x === b); | |
| S.budget = parseFloat(b.dataset.budget); | |
| persistBudget(); | |
| budgetInput.value = S.budget; | |
| rerender(); | |
| })); | |
| setupModelCombobox(); | |
| setupGpuSelect(); | |
| // ── Usage tab wiring ── | |
| // Sub-tabs | |
| document.querySelectorAll(".utab-btn").forEach(b => b.addEventListener("click", () => { | |
| document.querySelectorAll(".utab-btn").forEach(x => x.dataset.active = x === b); | |
| document.querySelectorAll("[data-upane]").forEach(p => p.classList.toggle("hidden", p.dataset.upane !== b.dataset.utab)); | |
| })); | |
| // ① Paste / drop | |
| $("copy-curl").addEventListener("click", async () => { | |
| await navigator.clipboard.writeText($("curl-snippet").textContent); | |
| setUsageStatus("curl command copied to clipboard."); | |
| }); | |
| $("usage-load").addEventListener("click", () => loadUsageFromText($("usage-paste").value)); | |
| $("usage-pickfile").addEventListener("click", () => $("usage-file").click()); | |
| $("usage-file").addEventListener("change", async e => { | |
| const f = e.target.files?.[0]; if (!f) return; | |
| const text = await f.text(); | |
| if (loadUsageFromText(text)) $("usage-paste").value = text.slice(0, 5000); | |
| }); | |
| // Drop zone — entire usage card | |
| document.querySelector('[data-pane="usage"]').addEventListener("dragover", e => { e.preventDefault(); }); | |
| document.querySelector('[data-pane="usage"]').addEventListener("drop", async e => { | |
| e.preventDefault(); | |
| const f = e.dataTransfer?.files?.[0]; if (!f) return; | |
| loadUsageFromText(await f.text()); | |
| }); | |
| // ② Connect | |
| const savedKey = localStorage.getItem("or_api_key") || ""; | |
| if (savedKey) $("api-key").value = savedKey; | |
| $("api-fetch").addEventListener("click", async () => { | |
| const key = $("api-key").value.trim(); | |
| if (!key) { setUsageStatus("Enter an API key first.", true); return; } | |
| localStorage.setItem("or_api_key", key); | |
| setUsageStatus("Fetching from openrouter.ai…"); | |
| $("api-status").textContent = "Calling /credits, /auth/key, /analytics …"; | |
| try { | |
| const raw = await fetchUsage(key); | |
| $("api-status").textContent = `analytics: ${raw.analytics?.error ? "❌ "+raw.analytics.error : "✓"} · credits: ${raw.credits?.error ? "❌" : "✓"} · auth: ${raw.auth?.error ? "❌" : "✓"}`; | |
| const norm = normalizeUsage(raw); | |
| if (!norm || !norm.rows.length) { setUsageStatus("Fetched, but no usage rows found in response.", true); return; } | |
| S.usage = norm; persistUsage(); | |
| setUsageStatus(`Loaded ${norm.rows.length} models from your account.`); | |
| renderUsageTab(); | |
| } catch (e) { setUsageStatus("Fetch failed: " + e.message, true); } | |
| }); | |
| $("api-clear").addEventListener("click", () => { | |
| localStorage.removeItem("or_api_key"); | |
| $("api-key").value = ""; | |
| setUsageStatus("Key cleared from this browser."); | |
| }); | |
| // ③ Bookmarklet | |
| $("bookmarklet").href = BOOKMARKLET_JS; | |
| $("bm-load").addEventListener("click", () => loadUsageFromText($("bm-paste").value)); | |
| // Delegated handlers for inline-editable inputs (re-rendered on every state change) | |
| document.addEventListener("input", e => { | |
| const t = e.target; | |
| if (t.matches?.("[data-input-gpu-price]")) { | |
| const name = t.dataset.inputGpuPrice; | |
| const v = parseFloat(t.value); | |
| if (v > 0) S.priceOverrides[name] = v; else delete S.priceOverrides[name]; | |
| persistOverrides(); | |
| rerender(); | |
| } else if (t.matches?.("[data-input-kwh]")) { $("kwh").value = parseFloat(t.value) || 0; rerender(); | |
| } else if (t.matches?.("[data-input-util]")) { $("util").value = (parseFloat(t.value) || 0) / 100; rerender(); | |
| } else if (t.matches?.("[data-input-years]")) { $("years").value = parseFloat(t.value) || 1; rerender(); | |
| } else if (t.matches?.("[data-input-tpd]")) { | |
| const exp = Math.log10(Math.max(1, parseFloat(t.value) || 1)); | |
| $("tpd").value = $("tpd2").value = $("tpd3").value = exp; | |
| rerender(); | |
| } | |
| }); | |
| // click-to-edit on inline price spans (verdict + cards) | |
| document.addEventListener("click", e => { | |
| const t = e.target.closest("[data-edit-gpu-price]"); | |
| if (!t || t.tagName === "INPUT") return; | |
| const name = t.dataset.editGpuPrice; | |
| const cur = S.priceOverrides[name] ?? GPUS.find(g => g.name === name)?.[S.price] ?? 0; | |
| const v = prompt(`Set custom price for ${name} (USD). Leave blank to reset.`, cur); | |
| if (v === null) return; | |
| if (v.trim() === "") delete S.priceOverrides[name]; | |
| else { const n = parseFloat(v); if (n > 0) S.priceOverrides[name] = n; } | |
| persistOverrides(); | |
| rerender(); | |
| }); | |
| // reset button for individual GPU | |
| document.addEventListener("click", e => { | |
| const t = e.target.closest("[data-clear-price]"); | |
| if (!t) return; | |
| delete S.priceOverrides[t.dataset.clearPrice]; | |
| persistOverrides(); | |
| rerender(); | |
| }); | |
| // "Already own a GPU" jump | |
| document.addEventListener("click", e => { | |
| if (e.target.closest("#already-own-btn")) setTab("gpu"); | |
| }); | |
| renderAllValueLabels(); | |
| setTab("model"); | |
| } | |
| init(); | |
| </script> | |
| </body></html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment