Demo script for session at Red Hat Summit 2026
-
MaaS is deployed. Payload processing is deployed
-
Apply MaaS CRs from local files.
cd <your-path>/models-as-a-service
kubectl create namespace llm --dry-run=client -o yaml | kubectl apply -f -
kustomize build docs/samples/maas-system | kubectl apply -f -
- Grab the MaaS host.
MHOST="https://maas.$(kubectl get ingresses.config.openshift.io cluster \
-o jsonpath='{.spec.domain}')"
- Expected:
200with the free model
curl -sSk ${MHOST}/maas-api/v1/models \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $(oc whoami -t)" | jq -r .
- Expected:
sk-oai-xxxxxxxxxx
API_KEY=$(curl -sSk \
-H "Authorization: Bearer $(oc whoami -t)" \
-H "Content-Type: application/json" \
-X POST \
-d '{
"name": "my-permanent-key",
"description": "Production API key for my application",
"subscription": "simulator-subscription"
}' \
"${MHOST}/maas-api/v1/api-keys" | jq -r .key)
echo $API_KEY
- Expected:
200with the free subscription
curl -X POST "${MHOST}/maas-api/internal/v1/subscriptions/select" \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d '{
"username": "<your-username>",
"groups": ["system:authenticated"],
"requestedSubscription": "simulator-subscription"
}'
- Case 1: free model, no auth provided - expected
401 - Case 2: free model, free API key provided - expected
200 - Case 3: premium model, free API key provided - expected
403
curl -sSk -o /dev/null -w "%{http_code}\n" \
"${MHOST}/llm/facebook-opt-125m-simulated/v1/chat/completions" \
-H "Content-Type: application/json" -d '{
"model":"facebook/opt-125m",
"messages":[{"role":"user","content":"Hi"}],
"max_tokens":5}'
curl -sSk -o /dev/null -w "%{http_code}\n" \
"${MHOST}/llm/facebook-opt-125m-simulated/v1/chat/completions" \
-H "Authorization: Bearer $API_KEY" \
-H "Content-Type: application/json" -d '{
"model":"facebook/opt-125m",
"messages":[{"role":"user","content":"Hi"}],
"max_tokens":5}'
curl -sSk -o /dev/null -w "%{http_code}\n" \
"${MHOST}/llm/premium-simulated-simulated-premium/v1/chat/completions" \
-H "Authorization: Bearer $API_KEY" \
-H "Content-Type: application/json" -d '{
"model":"facebook/opt-125m",
"messages":[{"role":"user","content":"Hi"}],
"max_tokens":5}'
- Expected: a few
200s followed by429s
for i in {1..16}; do curl -sSk -o /dev/null -w "%{http_code}\n" \
"${MHOST}/llm/facebook-opt-125m-simulated/v1/chat/completions" \
-H "Authorization: Bearer $API_KEY" \
-H "Content-Type: application/json" -d '{
"model":"facebook/opt-125m",
"messages":[{"role":"user","content":"Hi"}],
"max_tokens":50}'; done
- API keys created and available for all external model providers of interest (openai, anthropic, aws bedrock etc)
- Payload processing module deployed (handled by RHOAI 3.4 installer)
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
name: openai-api-key
namespace: llm
labels:
inference.networking.k8s.io/bbr-managed: "true"
type: Opaque
stringData:
api-key: "sk-proj-..."
EOF
cat <<EOF | kubectl apply -f -
apiVersion: maas.opendatahub.io/v1alpha1
kind: ExternalModel
metadata:
name: gpt-4o
namespace: llm
spec:
provider: openai
targetModel: gpt-4o
endpoint: api.openai.com
credentialRef:
name: openai-api-key
---
apiVersion: maas.opendatahub.io/v1alpha1
kind: MaaSModelRef
metadata:
name: gpt-4o
namespace: llm
spec:
modelRef:
kind: ExternalModel
name: gpt-4o
EOF
kubectl get maasmodelref -n llm
The output should show the 3 models in ready state .. somewhat like the following
% kubectl get maasmodelref -n llm
*NAME PHASE ENDPOINT HTTPROUTE GATEWAY AGE*
*facebook-opt-125m-simulated Ready https://maas.apps.ocp.zw8s9.sandbox2989.opentlc.com/llm/facebook-opt-125m-simulated facebook-opt-125m-simulated-kserve-route maas-default-gateway 2d22h*
*gpt-4o Ready https://maas.apps.ocp.zw8s9.sandbox2989.opentlc.com/llm/gpt-4o gpt-4o maas-default-gateway 17h*
*premium-simulated-simulated-premium Ready https://maas.apps.ocp.zw8s9.sandbox2989.opentlc.com/llm/premium-simulated-simulated-premium premium-simulated-simulated-premium-kserve-route maas-default-gateway 2d22h*
kubectl apply -f - <<'EOF'
apiVersion: maas.opendatahub.io/v1alpha1
kind: MaaSAuthPolicy
metadata:
name: gpt-4o-access
namespace: models-as-a-service
spec:
modelRefs:
- name: gpt-4o
namespace: llm
subjects:
groups:
- name: "system:authenticated"
---
apiVersion: maas.opendatahub.io/v1alpha1
kind: MaaSSubscription
metadata:
name: gpt-4o-subscription
namespace: models-as-a-service
spec:
owner:
groups:
- name: "system:authenticated"
modelRefs:
- name: gpt-4o
namespace: llm
tokenRateLimits:
- limit: 100000
window: "1h"
EOF
GW_HOST=$(kubectl get gateway maas-default-gateway -n openshift-ingress \
-o jsonpath='{.spec.listeners[0].hostname}')
TOKEN=$(oc whoami -t)
AKEY2=$(curl -sS -X POST "https://${GW_HOST}/maas-api/v1/api-keys" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"name":"external-model-key","subscription":"gpt-4o-subscription"}' | jq -r '.key')
echo "MaaS API key: ${AKEY2:0:20}..."
Note in the above example the maas api key was derived for a specific gpt-40 subscription but in general would be derived for a subscription that includes many models and offerings.
curl -sk "https://api.openai.com/v1/chat/completions" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, give me an interesting fact about LLMs"}],"max_tokens":100}'
# Through MaaS gateway
curl -sS "https://${GW_HOST}/llm/gpt-4o/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $AKEY2" \
-d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, give me an interesting fact about LLMs"}],"max_tokens":100}'