vgel · May 30, 2025 00:05 · ping-Toven · May 30, 2025 · vgel · May 30, 2025
diff --git a/standard_completions_model_features.json5 b/standard_completions_model_features.json5
 // curl -H "Authorization: Bearer ..." "https://$BASE_URL/models/features"
 {
  "host_fingerprint": "provider", // should be a unique string for the host or router (e.g. deepseek, openrouter, litellm)
  "data": [
    {
      // Required
      "id": "provider/model",
      "name": "Provider: Model",
      // should either match host_fingerprint if this is a primary provider (ie deepseek) or match the host_fingerprint of
      // the upstream provider, if this is a router. i.e., this string should never be "litellm"
      "provider": "provider",
      "created": 1690502400,
      "input_modalities": ["text", "image", "audio"],
      "output_modalities": ["text", "image"],
      "quantization": "fp8",
      "context_length": 1000000,
      "max_output_length": 128000,
      "pricing": {
        "text": {
          "input_token": "0.000008",
          "output_token": "0.000024"
        },
        "image": {
          "input_token": "0",
          "output_token": "0"
        },
        "audio": {
          "input_token": "0"
        }
        // should we also have cache here? or as a multiplier? or per-modality?
      },
      "supported_sampling_parameters": ["temperature", "stop"],
      "supported_features": [
        "tools",
        "json_mode",
        "structured_outputs",
        "web_search",
        "reasoning",
        "logprobs",
        "assistant_prefix",
      ],
      // e.g., if a chat model can also be queried in completion mode. base models would only support "completions"
      "supported_endpoints": ["completions", "chat_completions"],
      "max_logprobs": 20,
      // Optional extra data, ie provider-specific, or for openrouter
      "extra": {
        "openrouter": {},
      },
    }
  ]
 }
	// curl -H "Authorization: Bearer ..." "https://$BASE_URL/models/features"
	{
	"host_fingerprint": "provider", // should be a unique string for the host or router (e.g. deepseek, openrouter, litellm)
	"data": [
	{
	// Required
	"id": "provider/model",
	"name": "Provider: Model",
	// should either match host_fingerprint if this is a primary provider (ie deepseek) or match the host_fingerprint of
	// the upstream provider, if this is a router. i.e., this string should never be "litellm"
	"provider": "provider",
	"created": 1690502400,
	"input_modalities": ["text", "image", "audio"],
	"output_modalities": ["text", "image"],
	"quantization": "fp8",
	"context_length": 1000000,
	"max_output_length": 128000,
	"pricing": {
	"text": {
	"input_token": "0.000008",
	"output_token": "0.000024"
	},
	"image": {
	"input_token": "0",
	"output_token": "0"
	},
	"audio": {
	"input_token": "0"
	}
	// should we also have cache here? or as a multiplier? or per-modality?
	},
	"supported_sampling_parameters": ["temperature", "stop"],
	"supported_features": [
	"tools",
	"json_mode",
	"structured_outputs",
	"web_search",
	"reasoning",
	"logprobs",
	"assistant_prefix",
	],
	// e.g., if a chat model can also be queried in completion mode. base models would only support "completions"
	"supported_endpoints": ["completions", "chat_completions"],
	"max_logprobs": 20,
	// Optional extra data, ie provider-specific, or for openrouter
	"extra": {
	"openrouter": {},
	},
	}
	]
	}