PR #709 - re-validation and updated as of 4/11 12:06AM EST
$ HOST="https://maas.$(kubectl get ingress.config.openshift.io/cluster -o jsonpath='{.spec.domain}')"
TOKEN=$(oc whoami -t)
$ API_KEY=$(curl -sSk -X POST "$HOST/maas-api/v1/api-keys" \
-H "Authorization: Bearer $TOKEN" \
-H 'Content-Type: application/json' \
-d '{"name":"test","expiresIn":"2h"}' | jq -r '.key')
echo "API_KEY=$API_KEY"
API_KEY=sk-oai-12uqhPaRQuPsN565n_wssLFvGagBlDRlJ7GE8vUB3dASs1wFAp1744D68x4pi
$ curl -sSk "$HOST/llm/facebook-opt-125m-simulated/v1/chat/completions" \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d '{"model":"facebook/opt-125m","messages":[{"role":"user","content":"hello"}],"max_tokens":8}' | jq .
{
"id": "chatcmpl-7fe57e5f-e02b-5832-8eed-b879424e65b6",
"created": 1775880185,
"model": "facebook/opt-125m",
"usage": {
"prompt_tokens": 1,
"completion_tokens": 3,
"total_tokens": 4
},
"object": "chat.completion",
"kv_transfer_params": null,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "Alas, poor "
}
}
]
}
$ curl -sSk "$HOST/llm/gpt-4o/v1/chat/completions" \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d '{"model":"gpt-4o","messages":[{"role":"user","content":"say hi in one word"}],"max_tokens":8}' | jq .
{
"id": "chatcmpl-DTJr7XVNLkz2uRD37buzfiOMq8cEm",
"object": "chat.completion",
"created": 1775880209,
"model": "gpt-4o-2024-08-06",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hi!",
"refusal": null,
"annotations": []
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 12,
"completion_tokens": 2,
"total_tokens": 14,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"service_tier": "default",
"system_fingerprint": "fp_c6907745f9"
}
$ kubectl get httproute gpt-4o -n llm -o jsonpath='{.spec.rules[0].matches[0].path.value}'
/llm/gpt-4o
$ kubectl get httproute gpt-4o -n llm -o jsonpath='{.metadata.ownerReferences[0].kind}'
ExternalModel
$ kubectl get httproute gpt-4o -n llm -o jsonpath='{.spec.rules[0].filters[0].type}'
RequestHeaderModifier