PydanticAI

This is an ugly and minimally tested workaround for folks who would like to use PydanticAI response model formatting with VLLM prior to 13002 being merged into VLLM. I've tested this avainst PydanticAI 0.0.46:

First apply this monkeypatch to PydanticAI. Basically it allows agent runs to specify named tools on model settings:

# 2025-03-30 Patched 0.0.46
# pylint: disable=protected-access
async def patched_completions_create(
    self,
    messages: list[ModelMessage],
    stream: bool,
    model_settings: OpenAIModelSettings,
    model_request_parameters: ModelRequestParameters,
) -> chat.ChatCompletion | AsyncStream[ChatCompletionChunk]:
    # Your modified implementation goes here
    # For example, you could start with the original code and modify as needed:
    tools = self._get_tools(model_request_parameters)

    # ToD: Allow users to override tool_choice
    tool_choice: str | None = None
    if (
        len(model_request_parameters.function_tools) > 0
        or len(model_request_parameters.result_tools) > 0
    ):

        if (
            "tool_choice" in model_settings
            and model_settings["tool_choice"] is not None
        ):
            tool_choice = model_settings["tool_choice"]  # type: ignore
        else:
            # ToD: else apply the default behavior
            if not model_request_parameters.allow_text_result:
                tool_choice = "required"
            else:
                tool_choice = "auto"

    openai_messages: list[chat.ChatCompletionMessageParam] = []
    for m in messages:
        async for msg in self._map_message(m):
            openai_messages.append(msg)

    # Add your custom modifications here
    # Then call the API with your changes
    try:
        return await self.client.chat.completions.create(
            model=self._model_name,
            messages=openai_messages,
            n=1,
            parallel_tool_calls=model_settings.get("parallel_tool_calls", NOT_GIVEN),
            tools=tools or NOT_GIVEN,
            tool_choice=tool_choice or NOT_GIVEN,
            stream=stream,
            stream_options={"include_usage": True} if stream else NOT_GIVEN,
            max_completion_tokens=model_settings.get("max_tokens", NOT_GIVEN),
            temperature=model_settings.get("temperature", NOT_GIVEN),
            top_p=model_settings.get("top_p", NOT_GIVEN),
            timeout=model_settings.get("timeout", NOT_GIVEN),
            seed=model_settings.get("seed", NOT_GIVEN),
            presence_penalty=model_settings.get("presence_penalty", NOT_GIVEN),
            frequency_penalty=model_settings.get("frequency_penalty", NOT_GIVEN),
            logit_bias=model_settings.get("logit_bias", NOT_GIVEN),
            reasoning_effort=model_settings.get("openai_reasoning_effort", NOT_GIVEN),
            user=model_settings.get("user", NOT_GIVEN),
        )
    except APIStatusError as e:
        if (status_code := e.status_code) >= 400:
            raise ModelHTTPError(
                status_code=status_code, model_name=self.model_name, body=e.body
            ) from e
        raise


# Apply Monkeypatch
OpenAIModel._completions_create = patched_completions_create  # type: ignore

Example of using named tool call to force PydanticAI to properly parse the final result:

setting_format_result = await Agent(name="Format Story Setting").run(
    user_prompt=prompt(PYDANTICAI_FORMAT_LAST_OUTPUT_PROMPT),
    model=ctx.deps.llm,
    message_history=setting_generation_result.all_messages(),
    model_settings={  # type: ignore
        **ctx.deps.llm_defaults,  # type: ignore
        "frequency_penalty": 0.0,
        "presence_penalty": 0.0,
        "tool_choice": {
            "type": "function",
            "function": {"name": "final_result"},
        },
    },
    result_type=StorySetting,
)

theobjectivedad/pydanticai_vllm_monkeypatch.md

PydanticAI