Created
March 16, 2026 20:34
-
-
Save manmal/36fa6cbac096ad4f385ab0244cccaefc to your computer and use it in GitHub Desktop.
Qwen 3.5 35B3A model provider extension
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * vllm-mlx Local Provider Extension | |
| */ | |
| import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; | |
| const VLLM_HOST = process.env.VLLM_HOST ?? "http://localhost"; | |
| const VLLM_PORT = process.env.VLLM_PORT ?? "8000"; | |
| const BASE_URL = `${VLLM_HOST}:${VLLM_PORT}/v1`; | |
| export default function (pi: ExtensionAPI) { | |
| pi.registerProvider("vllm-local", { | |
| baseUrl: BASE_URL, | |
| apiKey: "not-needed", | |
| authHeader: false, | |
| api: "openai-completions", | |
| models: [ | |
| { | |
| id: "mlx-community/Qwen3.5-35B-A3B-4bit", | |
| name: "Qwen3.5 35B-A3B 4bit (local vllm-mlx)", | |
| reasoning: true, | |
| input: ["text", "image"], | |
| cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, | |
| contextWindow: 32768, | |
| maxTokens: 4096, | |
| compat: { | |
| supportsDeveloperRole: false, | |
| supportsReasoningEffort: false, | |
| maxTokensField: "max_tokens", | |
| thinkingFormat: "qwen", | |
| }, | |
| }, | |
| ], | |
| }); | |
| pi.on("session_start", async () => { | |
| const all = pi.getAllTools(); | |
| const allNames = all.map((t) => t.name); | |
| const wanted = ["lookup_barcode", "lookup_product", "get_nutrition_facts"]; | |
| const available = wanted.filter((n) => allNames.includes(n)); | |
| pi.setActiveTools(available); | |
| }); | |
| pi.on("before_agent_start", async (event) => { | |
| const customPrompt = `{YOUR_PROMPT_HERE}`; | |
| return { systemPrompt: customPrompt }; | |
| }); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment