Skip to content

Instantly share code, notes, and snippets.

@theobjectivedad
Created April 1, 2025 15:59
Show Gist options
  • Save theobjectivedad/8e60216e03aa073d8fd16dcdafbb200f to your computer and use it in GitHub Desktop.
Save theobjectivedad/8e60216e03aa073d8fd16dcdafbb200f to your computer and use it in GitHub Desktop.
PydanticAI to VLLM Tool Calling Monkeypatch

PydanticAI

This is an ugly and minimally tested workaround for folks who would like to use PydanticAI response model formatting with VLLM prior to 13002 being merged into VLLM. I've tested this avainst PydanticAI 0.0.46:

First apply this monkeypatch to PydanticAI. Basically it allows agent runs to specify named tools on model settings:

# 2025-03-30 Patched 0.0.46
# pylint: disable=protected-access
async def patched_completions_create(
    self,
    messages: list[ModelMessage],
    stream: bool,
    model_settings: OpenAIModelSettings,
    model_request_parameters: ModelRequestParameters,
) -> chat.ChatCompletion | AsyncStream[ChatCompletionChunk]:
    # Your modified implementation goes here
    # For example, you could start with the original code and modify as needed:
    tools = self._get_tools(model_request_parameters)

    # ToD: Allow users to override tool_choice
    tool_choice: str | None = None
    if (
        len(model_request_parameters.function_tools) > 0
        or len(model_request_parameters.result_tools) > 0
    ):

        if (
            "tool_choice" in model_settings
            and model_settings["tool_choice"] is not None
        ):
            tool_choice = model_settings["tool_choice"]  # type: ignore
        else:
            # ToD: else apply the default behavior
            if not model_request_parameters.allow_text_result:
                tool_choice = "required"
            else:
                tool_choice = "auto"

    openai_messages: list[chat.ChatCompletionMessageParam] = []
    for m in messages:
        async for msg in self._map_message(m):
            openai_messages.append(msg)

    # Add your custom modifications here
    # Then call the API with your changes
    try:
        return await self.client.chat.completions.create(
            model=self._model_name,
            messages=openai_messages,
            n=1,
            parallel_tool_calls=model_settings.get("parallel_tool_calls", NOT_GIVEN),
            tools=tools or NOT_GIVEN,
            tool_choice=tool_choice or NOT_GIVEN,
            stream=stream,
            stream_options={"include_usage": True} if stream else NOT_GIVEN,
            max_completion_tokens=model_settings.get("max_tokens", NOT_GIVEN),
            temperature=model_settings.get("temperature", NOT_GIVEN),
            top_p=model_settings.get("top_p", NOT_GIVEN),
            timeout=model_settings.get("timeout", NOT_GIVEN),
            seed=model_settings.get("seed", NOT_GIVEN),
            presence_penalty=model_settings.get("presence_penalty", NOT_GIVEN),
            frequency_penalty=model_settings.get("frequency_penalty", NOT_GIVEN),
            logit_bias=model_settings.get("logit_bias", NOT_GIVEN),
            reasoning_effort=model_settings.get("openai_reasoning_effort", NOT_GIVEN),
            user=model_settings.get("user", NOT_GIVEN),
        )
    except APIStatusError as e:
        if (status_code := e.status_code) >= 400:
            raise ModelHTTPError(
                status_code=status_code, model_name=self.model_name, body=e.body
            ) from e
        raise


# Apply Monkeypatch
OpenAIModel._completions_create = patched_completions_create  # type: ignore

Example of using named tool call to force PydanticAI to properly parse the final result:

setting_format_result = await Agent(name="Format Story Setting").run(
    user_prompt=prompt(PYDANTICAI_FORMAT_LAST_OUTPUT_PROMPT),
    model=ctx.deps.llm,
    message_history=setting_generation_result.all_messages(),
    model_settings={  # type: ignore
        **ctx.deps.llm_defaults,  # type: ignore
        "frequency_penalty": 0.0,
        "presence_penalty": 0.0,
        "tool_choice": {
            "type": "function",
            "function": {"name": "final_result"},
        },
    },
    result_type=StorySetting,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment