Created
October 10, 2025 10:05
-
-
Save limcheekin/3c67cd8ce6cc411e3e69a8846529fd4d to your computer and use it in GitHub Desktop.
TensorZero Configs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| clickhouse: | |
| image: clickhouse/clickhouse-server:25.8-alpine | |
| ports: | |
| - "8123:8123" | |
| environment: | |
| - 'CLICKHOUSE_USER=${CLICKHOUSE_USER:?Environment variable CLICKHOUSE_USER must be set.}' | |
| - 'CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD:?Environment variable CLICKHOUSE_USER must be set.}' | |
| gateway: | |
| depends_on: | |
| - clickhouse | |
| image: tensorzero/gateway | |
| volumes: | |
| - '/home/limcheekin/docker/volumes/tensorzero/config:/app/config:ro' | |
| command: '--config-file /app/config/tensorzero.toml' | |
| environment: | |
| - 'TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL:?Environment variable TENSORZERO_CLICKHOUSE_URL must be set.}' | |
| - 'LOCALAI_API_KEY=${LOCALAI_API_KEY:-sk-1}' | |
| - 'OPENAI_API_KEY=${OPENAI_API_KEY:-sk-1}' | |
| ports: | |
| - '5002:3000' | |
| extra_hosts: | |
| - 'host.docker.internal:host-gateway' | |
| ui: | |
| depends_on: | |
| - clickhouse | |
| image: tensorzero/ui | |
| volumes: | |
| - '/home/limcheekin/docker/volumes/tensorzero/config:/app/config:ro' | |
| environment: | |
| - 'LOCALAI_API_KEY=${LOCALAI_API_KEY:-sk-1}' | |
| - 'TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL:?Environment variable TENSORZERO_CLICKHOUSE_URL must be set.}' | |
| - 'TENSORZERO_GATEWAY_URL=${TENSORZERO_GATEWAY_URL:-http://gateway:3000}' | |
| ports: | |
| - '5003:4000' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # config/tensorzero.toml | |
| ############################## | |
| # 1. Define Model & Provider # | |
| ############################## | |
| [models.granite_4_h_tiny] | |
| routing = ["local_ai"] | |
| [models.granite_4_h_tiny.providers.local_ai] | |
| type = "openai" | |
| api_base = "http://192.168.1.111:8880/v1" | |
| model_name = "ibm-granite_granite-4.0-h-tiny" | |
| api_key_location = "env::LOCALAI_API_KEY" | |
| extra_body = [ | |
| { pointer = "/cache_prompt", value = true} | |
| ] | |
| # This function will be the 'model' you select in Open WebUI | |
| ################################### | |
| # 2. Define Function & Variants # | |
| ################################### | |
| [functions.granite_4_h] | |
| type = "chat" | |
| [functions.granite_4_h.variants.local_ai] | |
| type = "chat_completion" | |
| model = "granite_4_h_tiny" | |
| weight = 100 | |
| # config/tensorzero.toml | |
| # --------------------- | |
| # Minimal TensorZero config for calling OpenAI GPT-4.1 | |
| # Notes: | |
| # - Ensure OPENAI_API_KEY is set in the environment (or change api_key_location). | |
| # - Model section name (gpt_4_1) is used as "tensorzero::model_name::gpt_4_1". | |
| # --------------------- | |
| [models.gpt_4_1] | |
| routing = ["openai"] | |
| [models.gpt_4_1.providers.openai] | |
| type = "openai" | |
| # Official OpenAI model id for GPT-4.1 | |
| model_name = "gpt-4.1" | |
| # Use environment variable OPENAI_API_KEY by default. | |
| api_key_location = "env::OPENAI_API_KEY" | |
| # Optional: api_base (if you use a proxy or Azure set a custom base) | |
| # api_base = "https://api.openai.com/v1" | |
| # A simple chat "function" that callers can invoke via the TensorZero gateway. | |
| [functions.chat_gpt4_1] | |
| type = "chat" | |
| description = "Default chat function using OpenAI gpt-4.1" | |
| [functions.chat_gpt4_1.variants.default] | |
| type = "chat_completion" | |
| model = "gpt_4_1" | |
| # Default request-time overrides for this variant. | |
| # These can still be overridden by callers (preferred). | |
| temperature = 0.0 # keep replies focused & deterministic — tweak as needed | |
| top_p = 0.95 | |
| max_tokens = 2048 # output token cap; tune per your app | |
| presence_penalty = 0.0 | |
| frequency_penalty = 0.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment