Skip to content

Instantly share code, notes, and snippets.

@Jachimo
Created March 30, 2026 18:30
Show Gist options
  • Select an option

  • Save Jachimo/2e53ed52086516e8efc7be0d4a64ccc2 to your computer and use it in GitHub Desktop.

Select an option

Save Jachimo/2e53ed52086516e8efc7be0d4a64ccc2 to your computer and use it in GitHub Desktop.
Helper scripts for updating venice.json config
#!/usr/bin/env bash
set -euo pipefail
ARGS=(-s https://api.venice.ai/api/v1/models)
curl "${ARGS[@]}" | jq '.' > venice-models.json
#!/usr/bin/env python3
"""
update-venice.py: Sync venice.json with the Venice AI models API response.
Usage:
python3 update-venice.py [options]
Options:
--api-file FILE API response file (default: venice-models.json)
--config-file FILE Config file to update (default: venice.json)
--dry-run Show changes without writing anything
--include-e2ee Include e2ee-* encrypted models (excluded by default)
--remove-missing Remove models from config that are absent from the API
For existing models, only context_window and pricing are updated (all other
fields such as can_reason, reasoning_levels, options, etc. are preserved as-is,
since they may have been manually curated).
For new models, all fields are inferred from API capabilities with sensible defaults.
"""
import argparse
import json
import sys
from pathlib import Path
def load_json(path: str) -> dict:
with open(path) as f:
return json.load(f)
def save_json(path: str, data: dict) -> None:
with open(path, "w") as f:
json.dump(data, f, indent=2)
f.write("\n")
def migrate_providers_map(current: object) -> dict[str, dict]:
providers_map: dict[str, dict] = {}
if isinstance(current, dict):
for key, entry in current.items():
if isinstance(entry, dict):
providers_map[key] = entry
elif isinstance(current, list):
for entry in current:
if isinstance(entry, dict):
entry_id = entry.get("id")
if entry_id:
providers_map[entry_id] = entry
return providers_map
def migrate_providers_list(current: object) -> list[dict]:
providers_list: list[dict] = []
if isinstance(current, list):
for entry in current:
if isinstance(entry, dict) and entry.get("id") != "venice":
providers_list.append(entry)
elif isinstance(current, dict):
for entry in current.values():
if isinstance(entry, dict):
providers_list.append(entry)
return providers_list
def sync_crush_models(
venice_config: dict,
crush_path: str,
style: str | None = "auto",
) -> bool:
"""Synchronize the Venice provider block in crush.json with venice.json."""
path = Path(crush_path)
if not path.exists():
print(f"Skipping Crush sync; {crush_path} not found.")
return False
crush = load_json(crush_path)
provider_block = {
"id": "venice",
"name": "Venice AI",
"type": "openai-compat",
"base_url": venice_config.get("api_endpoint", ""),
"api_key": venice_config.get("api_key", ""),
"models": json.loads(json.dumps(venice_config["models"])),
}
providers = crush.get("providers")
normalized_style = style or "auto"
if normalized_style == "auto":
if isinstance(providers, dict):
normalized_style = "map"
else:
normalized_style = "list"
if normalized_style == "map":
providers_map = migrate_providers_map(providers)
providers_map["venice"] = provider_block
crush["providers"] = providers_map
else:
providers_list = migrate_providers_list(providers)
providers_list.append(provider_block)
crush["providers"] = providers_list
save_json(crush_path, crush)
return True
def infer_model_entry(api_model: dict) -> dict:
"""Build a venice.json model entry from a raw API model object."""
spec = api_model["model_spec"]
caps = spec.get("capabilities", {})
can_reason = caps.get("supportsReasoning", False)
supports_vision = caps.get("supportsVision", False) or caps.get("supportsMultipleImages", False)
max_completion = spec.get("maxCompletionTokens", 4096)
entry = {
"id": api_model["id"],
"name": spec.get("name") or api_model["id"],
"cost_per_1m_in": spec["pricing"]["input"]["usd"],
"cost_per_1m_out": spec["pricing"]["output"]["usd"],
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": spec["availableContextTokens"],
"default_max_tokens": min(max_completion, 32768),
"can_reason": can_reason,
}
if can_reason:
entry["reasoning_levels"] = ["low", "medium", "high"]
entry["default_reasoning_effort"] = "medium"
entry["supports_attachments"] = supports_vision
entry["options"] = {}
return entry
def main() -> None:
parser = argparse.ArgumentParser(
description="Sync venice.json config with Venice AI API model data."
)
parser.add_argument("--api-file", default="venice-models.json")
parser.add_argument("--config-file", default="venice.json")
parser.add_argument("--crush-style", choices=["auto", "map", "list"],
default="auto",
help="Force the `providers` block to a map or list when syncing crush.json")
parser.add_argument("--dry-run", action="store_true",
help="Show what would change without writing")
parser.add_argument("--include-e2ee", action="store_true",
help="Include e2ee-* encrypted models (excluded by default)")
parser.add_argument("--remove-missing", action="store_true",
help="Remove config models not present in the API")
parser.add_argument("--crush-file", default="crush.json",
help="Path to crush override to sync with venice.json")
args = parser.parse_args()
api_data = load_json(args.api_file)
config = load_json(args.config_file)
# Index API text models, optionally filtering out e2ee variants
api_models: dict[str, dict] = {
m["id"]: m
for m in api_data.get("data", [])
if m.get("type") == "text"
and (args.include_e2ee or not m["id"].startswith("e2ee-"))
}
config_ids: set[str] = {m["id"] for m in config["models"]}
added: list[str] = []
updated: list[tuple[str, dict]] = []
removed: list[str] = []
# Pass 1: update existing models, detect removals
new_model_list = []
for model in config["models"]:
mid = model["id"]
if mid not in api_models:
removed.append(mid)
if not args.remove_missing:
new_model_list.append(model) # keep it unless explicitly removing
continue
spec = api_models[mid]["model_spec"]
changes: dict[str, tuple] = {}
new_ctx = spec["availableContextTokens"]
new_in = spec["pricing"]["input"]["usd"]
new_out = spec["pricing"]["output"]["usd"]
if model["context_window"] != new_ctx:
changes["context_window"] = (model["context_window"], new_ctx)
model["context_window"] = new_ctx
if abs(model["cost_per_1m_in"] - new_in) > 0.0001:
changes["cost_per_1m_in"] = (model["cost_per_1m_in"], new_in)
model["cost_per_1m_in"] = new_in
if abs(model["cost_per_1m_out"] - new_out) > 0.0001:
changes["cost_per_1m_out"] = (model["cost_per_1m_out"], new_out)
model["cost_per_1m_out"] = new_out
if changes:
updated.append((mid, changes))
new_model_list.append(model)
# Pass 2: append brand-new models from API
for mid, api_model in api_models.items():
if mid not in config_ids:
entry = infer_model_entry(api_model)
new_model_list.append(entry)
added.append(mid)
config["models"] = new_model_list
any_changes = bool(added or updated or removed)
# ── Report ────────────────────────────────────────────────────────────────
if removed:
disposition = (
"REMOVED" if args.remove_missing
else "NOT IN API (kept — use --remove-missing to drop)"
)
print(f"\n{disposition}:")
for mid in removed:
print(f" - {mid}")
if updated:
print("\nUPDATED (pricing / context_window):")
for mid, changes in updated:
print(f" {mid}:")
for field, (old, new) in changes.items():
print(f" {field}: {old} → {new}")
if added:
print("\nADDED (new models from API):")
for mid in added:
m = next(m for m in new_model_list if m["id"] == mid)
print(f" + {mid} ({m['name']})")
crush_file = args.crush_file.strip() if args.crush_file else None
wrote_config = False
if not any_changes:
print("No changes detected.")
elif args.dry_run:
print(f"\n[dry-run] {len(new_model_list)} models total — nothing written.")
else:
save_json(args.config_file, config)
wrote_config = True
print(f"\nWrote {args.config_file} ({len(new_model_list)} models total)")
if crush_file:
if args.dry_run:
print(f"[dry-run] would sync {crush_file} with {len(new_model_list)} models")
else:
if sync_crush_models(config, crush_file, args.crush_style):
print(f"Wrote {crush_file}")
if __name__ == "__main__":
main()
{
"data": [
{
"created": 1742262554,
"id": "venice-uncensored",
"model_spec": {
"pricing": {
"input": {
"usd": 0.2,
"diem": 0.2
},
"output": {
"usd": 0.9,
"diem": 0.9
}
},
"availableContextTokens": 32000,
"maxCompletionTokens": 8192,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp16",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Designed for maximum creative freedom and authentic interaction. Ideal for open-ended exploration, roleplay, and unfiltered dialogue. Features minimal content restrictions.",
"name": "Venice Uncensored 1.1",
"modelSource": "https://huggingface.co/cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition",
"offline": false,
"privacy": "private",
"traits": [
"most_uncensored"
]
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1771545600,
"id": "venice-uncensored-role-play",
"model_spec": {
"pricing": {
"input": {
"usd": 0.5,
"diem": 0.5
},
"output": {
"usd": 2,
"diem": 2
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Optimized for creative roleplay scenarios with maximum freedom. Designed for immersive storytelling, character interactions, and open-ended creative writing.",
"name": "Venice Role Play Uncensored",
"modelSource": "https://huggingface.co/dphnAI/24B-3.2-RP-K2-final",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1711929600,
"id": "zai-org-glm-4.6",
"model_spec": {
"pricing": {
"input": {
"usd": 0.85,
"diem": 0.85
},
"cache_input": {
"usd": 0.3,
"diem": 0.3
},
"output": {
"usd": 2.75,
"diem": 2.75
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 198000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp4",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM-4.6 is a large language model developed by Zhiyuan AI, featuring strong reasoning capabilities and support for multiple languages. Supports the largest context window for processing extensive text and detailed analysis.",
"name": "GLM 4.6",
"modelSource": "https://huggingface.co/zai-org/GLM-4.6",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1770163200,
"id": "olafangensan-glm-4.7-flash-heretic",
"model_spec": {
"pricing": {
"input": {
"usd": 0.14,
"diem": 0.14
},
"output": {
"usd": 0.8,
"diem": 0.8
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 200000,
"maxCompletionTokens": 24000,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM-4.7-Flash-Heretic is an uncensored experimental variant of GLM-4.7-Flash, optimized for creative freedom and unfiltered dialogue with fast inference speed.",
"name": "GLM 4.7 Flash Heretic",
"modelSource": "https://huggingface.co/Olafangensan/GLM-4.7-Flash-heretic",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1769644800,
"id": "zai-org-glm-4.7-flash",
"model_spec": {
"pricing": {
"input": {
"usd": 0.125,
"diem": 0.125
},
"output": {
"usd": 0.5,
"diem": 0.5
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM-4.7-Flash is a fast inference variant of GLM-4.7, optimized for speed while maintaining strong reasoning capabilities. Ideal for applications requiring quick responses with good quality.",
"name": "GLM 4.7 Flash",
"modelSource": "https://huggingface.co/zai-org/GLM-4.7-Flash",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1770768000,
"id": "zai-org-glm-5",
"model_spec": {
"pricing": {
"input": {
"usd": 1,
"diem": 1
},
"cache_input": {
"usd": 0.2,
"diem": 0.2
},
"output": {
"usd": 3.2,
"diem": 3.2
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 198000,
"maxCompletionTokens": 32000,
"capabilities": {
"optimizedForCode": true,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM-5 is the next-generation large language model developed by Zhiyuan AI, featuring significantly enhanced reasoning capabilities, improved instruction following, and support for multiple languages. Supports large context windows for processing extensive text and detailed analysis.",
"name": "GLM 5",
"modelSource": "https://huggingface.co/zai-org/GLM-5",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1766534400,
"id": "zai-org-glm-4.7",
"model_spec": {
"pricing": {
"input": {
"usd": 0.55,
"diem": 0.55
},
"cache_input": {
"usd": 0.11,
"diem": 0.11
},
"output": {
"usd": 2.65,
"diem": 2.65
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp4",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM-4.7 is a large language model developed by Zhiyuan AI, featuring strong reasoning capabilities and support for multiple languages. Supports the largest context window for processing extensive text and detailed analysis.",
"name": "GLM 4.7",
"modelSource": "https://huggingface.co/zai-org/GLM-4.7",
"offline": false,
"privacy": "private",
"traits": [
"default",
"most_intelligent",
"function_calling_default"
]
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1772668800,
"id": "qwen3-5-9b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.05,
"diem": 0.05
},
"output": {
"usd": 0.15,
"diem": 0.15
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "A 9B dense model with 262K native context window (extendable to 1M). Features Gated DeltaNet hybrid attention architecture for efficient long-context processing. Supports 201 languages, thinking/reasoning mode, and function calling.",
"name": "Qwen 3.5 9B",
"modelSource": "https://huggingface.co/Qwen/Qwen3.5-9B",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1768435200,
"id": "mistral-small-3-2-24b-instruct",
"model_spec": {
"pricing": {
"input": {
"usd": 0.09375,
"diem": 0.09375
},
"output": {
"usd": 0.25,
"diem": 0.25
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Mistral Small 3.2 is a 24B parameter model optimized for efficiency and performance. Ideal for general-purpose tasks with balanced speed and capability.",
"name": "Mistral Small 3.2 24B Instruct",
"modelSource": "https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1745903059,
"id": "qwen3-235b-a22b-thinking-2507",
"model_spec": {
"pricing": {
"input": {
"usd": 0.45,
"diem": 0.45
},
"output": {
"usd": 3.5,
"diem": 3.5
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Built for in-depth research and handling long, complex documents. Ideal for technical work, multimodal input, and high-precision tasks.",
"name": "Qwen 3 235B A22B Thinking 2507",
"modelSource": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8",
"offline": false,
"privacy": "private",
"traits": [
"default_reasoning"
]
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1745903059,
"id": "qwen3-235b-a22b-instruct-2507",
"model_spec": {
"pricing": {
"input": {
"usd": 0.15,
"diem": 0.15
},
"output": {
"usd": 0.75,
"diem": 0.75
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Built for in-depth research and handling long, complex documents. Ideal for technical work, multimodal input, and high-precision tasks.",
"name": "Qwen 3 235B A22B Instruct 2507",
"modelSource": "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1745903059,
"id": "qwen3-next-80b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.35,
"diem": 0.35
},
"output": {
"usd": 1.9,
"diem": 1.9
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp16",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Optimized for speed and efficiency.",
"name": "Qwen 3 Next 80b",
"modelSource": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1745903059,
"id": "qwen3-coder-480b-a35b-instruct",
"model_spec": {
"pricing": {
"input": {
"usd": 0.75,
"diem": 0.75
},
"output": {
"usd": 3,
"diem": 3
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Optimized for code.",
"name": "Qwen 3 Coder 480b",
"modelSource": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
"offline": false,
"privacy": "private",
"traits": [
"default_code"
]
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1758758400,
"id": "hermes-3-llama-3.1-405b",
"model_spec": {
"pricing": {
"input": {
"usd": 1.1,
"diem": 1.1
},
"output": {
"usd": 3,
"diem": 3
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Hermes 3 405B is a frontier level, full parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.",
"name": "Hermes 3 Llama 3.1 405b",
"modelSource": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1762214400,
"id": "google-gemma-3-27b-it",
"model_spec": {
"pricing": {
"input": {
"usd": 0.12,
"diem": 0.12
},
"output": {
"usd": 0.2,
"diem": 0.2
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to Gemma 2.",
"name": "Google Gemma 3 27B Instruct",
"modelSource": "https://huggingface.co/google/gemma-3-27b-it",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1764547200,
"id": "grok-41-fast",
"model_spec": {
"pricing": {
"input": {
"usd": 0.25,
"diem": 0.25
},
"cache_input": {
"usd": 0.0625,
"diem": 0.0625
},
"output": {
"usd": 0.625,
"diem": 0.625
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 1000000,
"maxCompletionTokens": 30000,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Grok 4.1 Fast is xAI's best agentic tool-calling model that shines in real-world use cases like customer support and image analysis.",
"name": "Grok 4.1 Fast",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773273600,
"id": "grok-4-20-beta",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 2.5,
"diem": 2.5
},
"cache_input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 7.5,
"diem": 7.5
},
"extended": {
"context_token_threshold": 200000,
"input": {
"usd": 5,
"diem": 5
},
"output": {
"usd": 15,
"diem": 15
},
"cache_input": {
"usd": 0.25,
"diem": 0.25
}
}
},
"availableContextTokens": 2000000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": true
},
"description": "Grok 4.20 Beta is xAI's latest multimodal reasoning model with strong tool use, structured output support, and a 2M-token context window.",
"name": "Grok 4.20 Beta",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773273600,
"id": "grok-4-20-multi-agent-beta",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 2.5,
"diem": 2.5
},
"cache_input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 7.5,
"diem": 7.5
},
"extended": {
"context_token_threshold": 200000,
"input": {
"usd": 5,
"diem": 5
},
"output": {
"usd": 15,
"diem": 15
},
"cache_input": {
"usd": 0.25,
"diem": 0.25
}
}
},
"availableContextTokens": 2000000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": true
},
"description": "Grok 4.20 Multi-Agent Beta is a variant of xAI Grok 4.20 designed for collaborative, agent-based workflows. Multiple agents operate in parallel to conduct deep research, coordinate tool use, and synthesize information across complex tasks.",
"name": "Grok 4.20 Multi-Agent Beta",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1771459200,
"id": "gemini-3-1-pro-preview",
"model_spec": {
"pricing": {
"input": {
"usd": 2.5,
"diem": 2.5
},
"cache_input": {
"usd": 0.5,
"diem": 0.5
},
"cache_write": {
"usd": 0.5,
"diem": 0.5
},
"output": {
"usd": 15,
"diem": 15
},
"extended": {
"context_token_threshold": 200000,
"input": {
"usd": 5,
"diem": 5
},
"output": {
"usd": 22.5,
"diem": 22.5
},
"cache_input": {
"usd": 0.5,
"diem": 0.5
},
"cache_write": {
"usd": 0.5,
"diem": 0.5
}
}
},
"availableContextTokens": 1000000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": true,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 20,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": true,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Gemini 3.1 Pro is the latest evolution of Google flagship frontier model with 1M context, advancing high-precision multimodal reasoning across text, image, and code.",
"name": "Gemini 3.1 Pro Preview",
"modelSource": "https://deepmind.google/models/gemini/pro/",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1766102400,
"id": "gemini-3-flash-preview",
"model_spec": {
"pricing": {
"input": {
"usd": 0.7,
"diem": 0.7
},
"cache_input": {
"usd": 0.07,
"diem": 0.07
},
"output": {
"usd": 3.75,
"diem": 3.75
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": true,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": true,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi-turn chat, and coding assistance. It delivers near Pro level reasoning with substantially lower latency.",
"name": "Gemini 3 Flash Preview",
"modelSource": "https://deepmind.google/models/gemini/flash/",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1770249600,
"id": "claude-opus-4-6",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 6,
"diem": 6
},
"cache_input": {
"usd": 0.6,
"diem": 0.6
},
"cache_write": {
"usd": 7.5,
"diem": 7.5
},
"output": {
"usd": 30,
"diem": 30
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 1000000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Claude Opus 4.6 is Anthropic's most capable reasoning model, building on Opus 4.5 with enhanced performance across complex software engineering, agentic workflows, and long-horizon tasks. It features a 1M token context window, improved multimodal capabilities, and stronger robustness to prompt injection.",
"name": "Claude Opus 4.6",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1764979200,
"id": "claude-opus-4-5",
"model_spec": {
"pricing": {
"input": {
"usd": 6,
"diem": 6
},
"cache_input": {
"usd": 0.6,
"diem": 0.6
},
"cache_write": {
"usd": 7.5,
"diem": 7.5
},
"output": {
"usd": 30,
"diem": 30
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Claude Opus 4.5 is Anthropic's frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection.",
"name": "Claude Opus 4.5",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1771286400,
"id": "claude-sonnet-4-6",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 3.6,
"diem": 3.6
},
"cache_input": {
"usd": 0.36,
"diem": 0.36
},
"cache_write": {
"usd": 4.5,
"diem": 4.5
},
"output": {
"usd": 18,
"diem": 18
}
},
"availableContextTokens": 1000000,
"maxCompletionTokens": 64000,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Claude Sonnet 4.6 is Anthropic's best combination of speed and intelligence, offering strong performance on coding, reasoning, and general tasks with excellent speed and cost efficiency. It features a 1M token context window and 64K max output tokens.",
"name": "Claude Sonnet 4.6",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1736899200,
"id": "claude-sonnet-4-5",
"model_spec": {
"pricing": {
"input": {
"usd": 3.75,
"diem": 3.75
},
"cache_input": {
"usd": 0.375,
"diem": 0.375
},
"cache_write": {
"usd": 4.69,
"diem": 4.69
},
"output": {
"usd": 18.75,
"diem": 18.75
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 64000,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Claude Sonnet 4.5 is Anthropic's balanced model offering strong performance on coding, reasoning, and general tasks with good speed and cost efficiency.",
"name": "Claude Sonnet 4.5",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1762387200,
"id": "openai-gpt-oss-120b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.07,
"diem": 0.07
},
"output": {
"usd": 0.3,
"diem": 0.3
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation",
"name": "OpenAI GPT OSS 120B",
"modelSource": "https://huggingface.co/openai/gpt-oss-120b",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1765324800,
"id": "kimi-k2-thinking",
"model_spec": {
"pricing": {
"input": {
"usd": 0.75,
"diem": 0.75
},
"cache_input": {
"usd": 0.375,
"diem": 0.375
},
"output": {
"usd": 3.2,
"diem": 3.2
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "int4",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Kimi K2 Thinking is Moonshot AIs most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows.",
"name": "Kimi K2 Thinking",
"modelSource": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1769548800,
"id": "kimi-k2-5",
"model_spec": {
"pricing": {
"input": {
"usd": 0.56,
"diem": 0.56
},
"cache_input": {
"usd": 0.11,
"diem": 0.11
},
"output": {
"usd": 3.5,
"diem": 3.5
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Kimi K2.5 is Moonshot AIs most advanced open reasoning model, featuring trillion-parameter Mixture-of-Experts architecture with 32B active parameters and 256K context windows.",
"name": "Kimi K2.5",
"modelSource": "",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1764806400,
"id": "deepseek-v3.2",
"model_spec": {
"pricing": {
"input": {
"usd": 0.33,
"diem": 0.33
},
"cache_input": {
"usd": 0.16,
"diem": 0.16
},
"output": {
"usd": 0.48,
"diem": 0.48
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 160000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "DeepSeek-V3.2 is an efficient large language model with DeepSeek Sparse Attention (DSA) for long contexts. It features strong reasoning and tool-use skills, achieving top results on the 2025 IMO and IOI.",
"name": "DeepSeek V3.2",
"modelSource": "https://huggingface.co/deepseek-ai/DeepSeek-V3.2",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1774310400,
"id": "aion-labs.aion-2-0",
"model_spec": {
"pricing": {
"input": {
"usd": 1,
"diem": 1
},
"cache_input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 2,
"diem": 2
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Aion 2.0 is a DeepSeek V3.2-based model fine-tuned for immersive roleplaying and long-form storytelling. It excels at introducing tension, crises, and meaningful conflict into narratives, keeping stories unpredictable and deeply engaging. The model handles mature and darker themes with exceptional nuance, avoiding sanitized or surface-level treatment. Available as an early alpha—your feedback helps shape its development.",
"name": "Aion 2.0",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1727966436,
"id": "llama-3.2-3b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.15,
"diem": 0.15
},
"output": {
"usd": 0.6,
"diem": 0.6
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp16",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"name": "Llama 3.2 3B",
"modelSource": "https://huggingface.co/meta-llama/Llama-3.2-3B",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1743897600,
"id": "llama-3.3-70b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.7,
"diem": 0.7
},
"output": {
"usd": 2.8,
"diem": 2.8
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"name": "Llama 3.3 70B",
"modelSource": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1765584000,
"id": "openai-gpt-52",
"model_spec": {
"pricing": {
"input": {
"usd": 2.19,
"diem": 2.19
},
"cache_input": {
"usd": 0.219,
"diem": 0.219
},
"output": {
"usd": 17.5,
"diem": 17.5
}
},
"model_sets": [
"venice_recommendations"
],
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context performance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.",
"name": "GPT-5.2",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1736899200,
"id": "openai-gpt-52-codex",
"model_spec": {
"pricing": {
"input": {
"usd": 2.19,
"diem": 2.19
},
"cache_input": {
"usd": 0.219,
"diem": 0.219
},
"output": {
"usd": 17.5,
"diem": 17.5
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.2 Codex is OpenAI specialized coding model built on GPT-5.2, optimized for advanced software development, code generation, and technical problem-solving.",
"name": "GPT-5.2 Codex",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1771891200,
"id": "openai-gpt-53-codex",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 2.19,
"diem": 2.19
},
"cache_input": {
"usd": 0.219,
"diem": 0.219
},
"output": {
"usd": 17.5,
"diem": 17.5
}
},
"availableContextTokens": 400000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.3 Codex is OpenAI specialized coding model built on GPT-5.3, optimized for advanced software development, code generation, and technical problem-solving.",
"name": "GPT-5.3 Codex",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1772668800,
"id": "openai-gpt-54",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 3.13,
"diem": 3.13
},
"cache_input": {
"usd": 0.313,
"diem": 0.313
},
"output": {
"usd": 18.8,
"diem": 18.8
}
},
"availableContextTokens": 1000000,
"maxCompletionTokens": 131072,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.4 is the latest frontier model in the GPT-5 series with a 1M+ context window, offering improved agentic and long context performance. It uses adaptive reasoning to dynamically allocate computation across tasks.",
"name": "GPT-5.4",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1772668800,
"id": "openai-gpt-54-pro",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 37.5,
"diem": 37.5
},
"output": {
"usd": 225,
"diem": 225
},
"extended": {
"context_token_threshold": 272000,
"input": {
"usd": 75,
"diem": 75
},
"output": {
"usd": 337.5,
"diem": 337.5
}
}
},
"availableContextTokens": 1000000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning for complex, high-stakes tasks. It provides a 1M+ token context window (922K input, 128K output) and supports text and image inputs.",
"name": "GPT-5.4 Pro",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1774569600,
"id": "openai-gpt-54-mini",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.9375,
"diem": 0.9375
},
"cache_input": {
"usd": 0.09375,
"diem": 0.09375
},
"output": {
"usd": 5.625,
"diem": 5.625
}
},
"availableContextTokens": 400000,
"maxCompletionTokens": 128000,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT-5.4 Mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use.",
"name": "GPT-5.4 Mini",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1772236800,
"id": "openai-gpt-4o-2024-11-20",
"model_spec": {
"pricing": {
"input": {
"usd": 3.125,
"diem": 3.125
},
"output": {
"usd": 12.5,
"diem": 12.5
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "OpenAI's multimodal flagship model with vision capabilities, strong reasoning, and broad knowledge. Popular for its balanced performance across tasks. Version: 2024-11-20.",
"name": "GPT-4o",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1772236800,
"id": "openai-gpt-4o-mini-2024-07-18",
"model_spec": {
"pricing": {
"input": {
"usd": 0.1875,
"diem": 0.1875
},
"cache_input": {
"usd": 0.09375,
"diem": 0.09375
},
"output": {
"usd": 0.75,
"diem": 0.75
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "OpenAI's cost-efficient small model that delivers GPT-4 level intelligence at a fraction of the cost. Ideal for high-volume applications requiring strong reasoning. Version: 2024-07-18.",
"name": "GPT-4o Mini",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1764547200,
"id": "minimax-m21",
"model_spec": {
"pricing": {
"input": {
"usd": 0.35,
"diem": 0.35
},
"cache_input": {
"usd": 0.04,
"diem": 0.04
},
"output": {
"usd": 1.5,
"diem": 1.5
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development.",
"name": "MiniMax M2.1",
"modelSource": "https://huggingface.co/MiniMaxAI/MiniMax-M2.1",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1770854400,
"id": "minimax-m25",
"model_spec": {
"pricing": {
"input": {
"usd": 0.34,
"diem": 0.34
},
"cache_input": {
"usd": 0.04,
"diem": 0.04
},
"output": {
"usd": 1.19,
"diem": 1.19
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "MiniMax-M2.5 is a state-of-the-art large language model optimized for coding, agentic workflows, and modern application development with enhanced reasoning capabilities.",
"name": "MiniMax M2.5",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "minimax-m27",
"model_spec": {
"pricing": {
"input": {
"usd": 0.375,
"diem": 0.375
},
"cache_input": {
"usd": 0.075,
"diem": 0.075
},
"output": {
"usd": 1.5,
"diem": 1.5
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": false,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity with advanced agentic capabilities through multi-agent collaboration.",
"name": "MiniMax M2.7",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1764547200,
"id": "grok-code-fast-1",
"model_spec": {
"pricing": {
"input": {
"usd": 0.25,
"diem": 0.25
},
"cache_input": {
"usd": 0.03,
"diem": 0.03
},
"output": {
"usd": 1.87,
"diem": 1.87
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 10000,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": true,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding",
"name": "Grok Code Fast 1",
"modelSource": "",
"offline": false,
"privacy": "anonymized",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1771977600,
"id": "qwen3-5-35b-a3b",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.3125,
"diem": 0.3125
},
"cache_input": {
"usd": 0.15625,
"diem": 0.15625
},
"output": {
"usd": 1.25,
"diem": 1.25
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": true,
"maxImages": 5,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": true,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"constraints": {
"temperature": {
"default": 1
},
"top_p": {
"default": 0.95
},
"repetition_penalty": {
"default": 1
}
},
"description": "Qwen 3.5 35B A3B is a highly efficient MoE model with 35B total parameters and only 3B active parameters. It surpasses the larger Qwen3-235B-A22B while being 6.7x smaller, excelling at reasoning, coding, and general knowledge tasks.",
"name": "Qwen 3.5 35B A3B",
"modelSource": "",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1768521600,
"id": "qwen3-vl-235b-a22b",
"model_spec": {
"pricing": {
"input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 1.5,
"diem": 1.5
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": true,
"maxImages": 10,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Qwen3-VL 235B vision-language model with MoE architecture. The most powerful VL model in the Qwen series with superior visual perception, OCR, and multimodal reasoning.",
"name": "Qwen3 VL 235B",
"modelSource": "https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct",
"offline": false,
"privacy": "private",
"traits": [
"default_vision"
]
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1769472000,
"id": "qwen3-coder-480b-a35b-instruct-turbo",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.35,
"diem": 0.35
},
"cache_input": {
"usd": 0.04,
"diem": 0.04
},
"output": {
"usd": 1.5,
"diem": 1.5
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 65536,
"capabilities": {
"optimizedForCode": true,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": true,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Turbo variant of Qwen3 Coder 480B, optimized for faster inference on code tasks.",
"name": "Qwen 3 Coder 480B Turbo",
"modelSource": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1769472000,
"id": "nvidia-nemotron-3-nano-30b-a3b",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.075,
"diem": 0.075
},
"output": {
"usd": 0.3,
"diem": 0.3
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 16384,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": true,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": true,
"supportsTeeAttestation": false,
"supportsE2EE": false,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "NVIDIA Nemotron 3 Nano 30B is a compact and efficient language model from NVIDIA, optimized for fast inference while maintaining strong performance across diverse tasks.",
"name": "NVIDIA Nemotron 3 Nano 30B",
"modelSource": "https://huggingface.co/nvidia/Nemotron-3-Nano-30B-A3B",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-venice-uncensored-24b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 1.15,
"diem": 1.15
}
},
"availableContextTokens": 32000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Venice Uncensored 1.1 running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.",
"name": "Venice Uncensored 1.1",
"modelSource": "https://huggingface.co/cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-gemma-3-27b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.14,
"diem": 0.14
},
"output": {
"usd": 0.5,
"diem": 0.5
}
},
"availableContextTokens": 40000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Gemma 3 27B running in a Trusted Execution Environment (TEE). Google's multimodal model supporting vision-language input with 140+ language understanding, with hardware attestation evidence available for independent verification.",
"name": "Gemma 3 27B",
"modelSource": "https://huggingface.co/google/gemma-3-27b-it",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-glm-4-7-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 1.1,
"diem": 1.1
},
"output": {
"usd": 4.15,
"diem": 4.15
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM 4.7 running in a Trusted Execution Environment (TEE). Z.AI's flagship model with enhanced programming capabilities and stable multi-step reasoning, with hardware attestation evidence available for independent verification.",
"name": "GLM 4.7",
"modelSource": "",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-glm-4-7-flash-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.13,
"diem": 0.13
},
"output": {
"usd": 0.55,
"diem": 0.55
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": true,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM 4.7 Flash running in a Trusted Execution Environment (TEE). A 30B-class model optimized for agentic coding with strong long-horizon task planning, with hardware attestation evidence available for independent verification.",
"name": "GLM 4.7 Flash",
"modelSource": "",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-gpt-oss-20b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.05,
"diem": 0.05
},
"output": {
"usd": 0.19,
"diem": 0.19
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT OSS 20B running in a Trusted Execution Environment (TEE). OpenAI's compact open-weight 21B MoE model with 3.6B active parameters, optimized for lower-latency inference, with hardware attestation evidence available for independent verification.",
"name": "GPT OSS 20B",
"modelSource": "https://huggingface.co/openai/gpt-oss-20b",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-gpt-oss-120b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.13,
"diem": 0.13
},
"output": {
"usd": 0.65,
"diem": 0.65
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GPT OSS 120B running in a Trusted Execution Environment (TEE). OpenAI's open-weight 117B-parameter MoE model with configurable reasoning depth and native tool use, with hardware attestation evidence available for independent verification.",
"name": "GPT OSS 120B",
"modelSource": "https://huggingface.co/openai/gpt-oss-120b",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-qwen-2-5-7b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.05,
"diem": 0.05
},
"output": {
"usd": 0.13,
"diem": 0.13
}
},
"availableContextTokens": 32000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Qwen 2.5 7B Instruct running in a Trusted Execution Environment (TEE). A compact model with strong coding, math, and multilingual capabilities supporting 29+ languages, with hardware attestation evidence available for independent verification.",
"name": "Qwen 2.5 7B",
"modelSource": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-qwen3-30b-a3b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.19,
"diem": 0.19
},
"output": {
"usd": 0.69,
"diem": 0.69
}
},
"availableContextTokens": 256000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Qwen3 30B A3B running in a Trusted Execution Environment (TEE). A MoE model with 30.5B total parameters and 3.3B activated per inference, supporting ultra-long 256K context, with hardware attestation evidence available for independent verification.",
"name": "Qwen3 30B A3B",
"modelSource": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-qwen3-vl-30b-a3b-p",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.25,
"diem": 0.25
},
"output": {
"usd": 0.9,
"diem": 0.9
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 4096,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": false,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": true,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Qwen3 VL 30B A3B running in a Trusted Execution Environment (TEE). A multimodal model unifying text generation with visual understanding for images and videos, with hardware attestation evidence available for independent verification.",
"name": "Qwen3 VL 30B A3B",
"modelSource": "https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-glm-5",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 1.1,
"diem": 1.1
},
"output": {
"usd": 4.15,
"diem": 4.15
}
},
"availableContextTokens": 198000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "fp8",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "GLM 5 running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.",
"name": "GLM 5",
"modelSource": "https://huggingface.co/zai-org/GLM-5",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
},
{
"created": 1773792000,
"id": "e2ee-qwen3-5-122b-a10b",
"model_spec": {
"betaModel": true,
"pricing": {
"input": {
"usd": 0.5,
"diem": 0.5
},
"output": {
"usd": 4,
"diem": 4
}
},
"availableContextTokens": 128000,
"maxCompletionTokens": 32768,
"capabilities": {
"optimizedForCode": false,
"quantization": "not-available",
"supportsAudioInput": false,
"supportsFunctionCalling": false,
"supportsLogProbs": false,
"supportsMultipleImages": false,
"supportsReasoning": true,
"supportsReasoningEffort": false,
"supportsResponseSchema": false,
"supportsTeeAttestation": true,
"supportsE2EE": true,
"supportsVideoInput": false,
"supportsVision": false,
"supportsWebSearch": true,
"supportsXSearch": false
},
"description": "Qwen3.5 122B A10B running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.",
"name": "Qwen3.5 122B A10B",
"modelSource": "https://huggingface.co/Qwen/Qwen3.5-122B-A10B",
"offline": false,
"privacy": "private",
"traits": []
},
"object": "model",
"owned_by": "venice.ai",
"type": "text"
}
],
"object": "list",
"type": "text"
}
{
"name": "Venice AI",
"id": "venice",
"api_key": "$VENICE_API_KEY",
"api_endpoint": "https://api.venice.ai/api/v1",
"type": "openai-compat",
"default_large_model_id": "claude-opus-4-6",
"default_small_model_id": "minimax-m25",
"models": [
{
"id": "claude-opus-4-5",
"name": "Claude Opus 4.5",
"cost_per_1m_in": 6,
"cost_per_1m_out": 30,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "claude-opus-4-6",
"name": "Claude Opus 4.6",
"cost_per_1m_in": 6,
"cost_per_1m_out": 30,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "claude-sonnet-4-5",
"name": "Claude Sonnet 4.5",
"cost_per_1m_in": 3.75,
"cost_per_1m_out": 18.75,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "claude-sonnet-4-6",
"name": "Claude Sonnet 4.6",
"cost_per_1m_in": 3.6,
"cost_per_1m_out": 18,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "deepseek-v3.2",
"name": "DeepSeek V3.2",
"cost_per_1m_in": 0.33,
"cost_per_1m_out": 0.48,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 160000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "zai-org-glm-4.6",
"name": "GLM 4.6",
"cost_per_1m_in": 0.85,
"cost_per_1m_out": 2.75,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "zai-org-glm-4.7",
"name": "GLM 4.7",
"cost_per_1m_in": 0.55,
"cost_per_1m_out": 2.65,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "zai-org-glm-4.7-flash",
"name": "GLM 4.7 Flash",
"cost_per_1m_in": 0.125,
"cost_per_1m_out": 0.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "olafangensan-glm-4.7-flash-heretic",
"name": "GLM 4.7 Flash Heretic",
"cost_per_1m_in": 0.14,
"cost_per_1m_out": 0.8,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 200000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "zai-org-glm-5",
"name": "GLM 5",
"cost_per_1m_in": 1,
"cost_per_1m_out": 3.2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "openai-gpt-4o-2024-11-20",
"name": "GPT-4o",
"cost_per_1m_in": 3.125,
"cost_per_1m_out": 12.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "openai-gpt-4o-mini-2024-07-18",
"name": "GPT-4o Mini",
"cost_per_1m_in": 0.1875,
"cost_per_1m_out": 0.75,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "openai-gpt-52",
"name": "GPT-5.2",
"cost_per_1m_in": 2.19,
"cost_per_1m_out": 17.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "openai-gpt-52-codex",
"name": "GPT-5.2 Codex",
"cost_per_1m_in": 2.19,
"cost_per_1m_out": 17.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "openai-gpt-53-codex",
"name": "GPT-5.3 Codex",
"cost_per_1m_in": 2.19,
"cost_per_1m_out": 17.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 400000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "openai-gpt-54",
"name": "GPT-5.4",
"cost_per_1m_in": 3.13,
"cost_per_1m_out": 18.8,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "openai-gpt-54-pro",
"name": "GPT-5.4 Pro",
"cost_per_1m_in": 37.5,
"cost_per_1m_out": 225,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "gemini-3-flash-preview",
"name": "Gemini 3 Flash Preview",
"cost_per_1m_in": 0.7,
"cost_per_1m_out": 3.75,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "gemini-3-1-pro-preview",
"name": "Gemini 3.1 Pro Preview",
"cost_per_1m_in": 2.5,
"cost_per_1m_out": 15,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "google-gemma-3-27b-it",
"name": "Google Gemma 3 27B Instruct",
"cost_per_1m_in": 0.12,
"cost_per_1m_out": 0.2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "grok-41-fast",
"name": "Grok 4.1 Fast",
"cost_per_1m_in": 0.25,
"cost_per_1m_out": 0.625,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 1000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "grok-4-20-beta",
"name": "Grok 4.20 Beta",
"cost_per_1m_in": 2.5,
"cost_per_1m_out": 7.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 2000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "grok-4-20-multi-agent-beta",
"name": "Grok 4.20 Multi-Agent Beta",
"cost_per_1m_in": 2.5,
"cost_per_1m_out": 7.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 2000000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "grok-code-fast-1",
"name": "Grok Code Fast 1",
"cost_per_1m_in": 0.25,
"cost_per_1m_out": 1.87,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "kimi-k2-thinking",
"name": "Kimi K2 Thinking",
"cost_per_1m_in": 0.75,
"cost_per_1m_out": 3.2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "kimi-k2-5",
"name": "Kimi K2.5",
"cost_per_1m_in": 0.56,
"cost_per_1m_out": 3.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "hermes-3-llama-3.1-405b",
"name": "Hermes 3 Llama 3.1 405b",
"cost_per_1m_in": 1.1,
"cost_per_1m_out": 3,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 16384,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "llama-3.2-3b",
"name": "Llama 3.2 3B",
"cost_per_1m_in": 0.15,
"cost_per_1m_out": 0.6,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "llama-3.3-70b",
"name": "Llama 3.3 70B",
"cost_per_1m_in": 0.7,
"cost_per_1m_out": 2.8,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "minimax-m21",
"name": "MiniMax M2.1",
"cost_per_1m_in": 0.35,
"cost_per_1m_out": 1.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "minimax-m25",
"name": "MiniMax M2.5",
"cost_per_1m_in": 0.34,
"cost_per_1m_out": 1.19,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "mistral-small-3-2-24b-instruct",
"name": "Mistral Small 3.2 24B Instruct",
"cost_per_1m_in": 0.09375,
"cost_per_1m_out": 0.25,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "nvidia-nemotron-3-nano-30b-a3b",
"name": "NVIDIA Nemotron 3 Nano 30B",
"cost_per_1m_in": 0.075,
"cost_per_1m_out": 0.3,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "openai-gpt-oss-120b",
"name": "OpenAI GPT OSS 120B",
"cost_per_1m_in": 0.07,
"cost_per_1m_out": 0.3,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-235b-a22b-instruct-2507",
"name": "Qwen 3 235B A22B Instruct 2507",
"cost_per_1m_in": 0.15,
"cost_per_1m_out": 0.75,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-235b-a22b-thinking-2507",
"name": "Qwen 3 235B A22B Thinking 2507",
"cost_per_1m_in": 0.45,
"cost_per_1m_out": 3.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-coder-480b-a35b-instruct-turbo",
"name": "Qwen 3 Coder 480B Turbo",
"cost_per_1m_in": 0.35,
"cost_per_1m_out": 1.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-coder-480b-a35b-instruct",
"name": "Qwen 3 Coder 480b",
"cost_per_1m_in": 0.75,
"cost_per_1m_out": 3,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-next-80b",
"name": "Qwen 3 Next 80b",
"cost_per_1m_in": 0.35,
"cost_per_1m_out": 1.9,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "qwen3-5-35b-a3b",
"name": "Qwen 3.5 35B A3B",
"cost_per_1m_in": 0.3125,
"cost_per_1m_out": 1.25,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {
"temperature": 1,
"top_p": 0.95
}
},
{
"id": "qwen3-5-9b",
"name": "Qwen 3.5 9B",
"cost_per_1m_in": 0.05,
"cost_per_1m_out": 0.15,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": true,
"options": {}
},
{
"id": "qwen3-vl-235b-a22b",
"name": "Qwen3 VL 235B",
"cost_per_1m_in": 0.25,
"cost_per_1m_out": 1.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 256000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "mistral-31-24b",
"name": "Venice Medium",
"cost_per_1m_in": 0.5,
"cost_per_1m_out": 2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "venice-uncensored",
"name": "Venice Uncensored 1.1",
"cost_per_1m_in": 0.2,
"cost_per_1m_out": 0.9,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 32000,
"default_max_tokens": 8000,
"can_reason": false,
"supports_attachments": false,
"options": {}
},
{
"id": "venice-uncensored-role-play",
"name": "Venice Role Play Uncensored",
"cost_per_1m_in": 0.5,
"cost_per_1m_out": 2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32000,
"can_reason": false,
"supports_attachments": true,
"options": {}
},
{
"id": "qwen3-4b",
"name": "Venice Small",
"cost_per_1m_in": 0.05,
"cost_per_1m_out": 0.15,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 32000,
"default_max_tokens": 8000,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "minimax-m27",
"name": "MiniMax M2.7",
"cost_per_1m_in": 0.375,
"cost_per_1m_out": 1.5,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 198000,
"default_max_tokens": 32768,
"can_reason": true,
"reasoning_levels": [
"low",
"medium",
"high"
],
"default_reasoning_effort": "medium",
"supports_attachments": false,
"options": {}
},
{
"id": "aion-labs.aion-2-0",
"name": "Aion 2.0",
"cost_per_1m_in": 1,
"cost_per_1m_out": 2,
"cost_per_1m_in_cached": 0,
"cost_per_1m_out_cached": 0,
"context_window": 128000,
"default_max_tokens": 32768,
"can_reason": false,
"supports_attachments": false,
"options": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment