ochafik · March 23, 2024 01:21
diff --git a/json-schema-pydantic-example.py b/json-schema-pydantic-example.py
 # Usage:
 #! ./server -m some-model.gguf &
 #! pip install pydantic
 #! python examples/json-schema-pydantic-example.py
 #
 # TODO:
 # - https://github.com/NousResearch/Hermes-Function-Calling
 #
 # <|im_start|>system
 # You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags
 # You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
 # <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals',
 # 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n    Args:\n    symbol (str): The stock symbol.\n\n    Returns:\n    dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} 
 # </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
 # <tool_call>
 # {'arguments': <args-dict>, 'name': <function-name>}
 # </tool_call><|im_end|>

 from dataclasses import dataclass
 import subprocess
 import sys
 from pydantic import BaseModel, TypeAdapter
 from annotated_types import MinLen
 from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin
 import json, requests

 def type_to_str(t):
    origin = get_origin(t)
    if origin is None:
        return t.__name__
    args = get_args(t)
    return origin.__name__ + (
        f'[{", ".join(type_to_str(a) for a in args)}]' if args else ''
    )

 def build_tool_call_adapter(final_output_type, *tools):
    lines = [
        'from pydantic import BaseModel, TypeAdapter',
        'from typing import Literal, Union',
    ]
    globs = {
        **globals(),
        **locals(),
        final_output_type.__name__: final_output_type,
    }
    tool_calls = []
    for fn in tools:
        # TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
        fn_name = fn.__name__
        fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
        name = fn_name.replace('_', ' ').title().replace(' ', '')
        lines += [
            f'class {name}ToolArgs(BaseModel):',
            *(f'  {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
            f'class {name}Tool(BaseModel):',
            *([f'  """{fn_doc}"""'] if fn_doc else []),
            f'  function: Literal["{fn_name}"]',
            f'  args: {name}ToolArgs',
            f'  def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
            f'    return {fn_name}(**self.args.dict())',
        ]
        tool_calls.append(f'{name}Tool')
    
    lines += [
        'class FinalResult(BaseModel):',
        f'  final_answer: {type_to_str(final_output_type)}',
        'class Response(BaseModel):',
        f'  """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
        f'  original_goal: str',
        # f'  thought_process: str',
        f'  thought: str',
        f'  next_step: Union[FinalResult, {", ".join(tool_calls)}]',
        f'response_adapter = TypeAdapter(Response)'
    ]

    exec('\n'.join(lines), globs)
    return globs['response_adapter']

 if True:

    def create_completion(*, response_model=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
        '''
        Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
        (llama.cpp server, llama-cpp-python, Anyscale / Together...)

        The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
        '''
        if response_model:
            if isinstance(response_model, TypeAdapter):
                type_adapter = response_model
            else:
                type_adapter = TypeAdapter(response_model)
            schema = type_adapter.json_schema()
            messages = [{
                "role": "system",
                "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
            }] + messages
            # print("Completion: ", json.dumps(messages, indent=2))
            # print("SCHEMA: " + json.dumps(schema, indent=2))
            response_format={"type": "json_object", "schema": schema }

        data = requests.post(endpoint, headers={"Content-Type": "application/json"},
                             json=dict(messages=messages, tools=tools, response_format=response_format, **kwargs)).json()
        if 'error' in data:
            raise Exception(data['error']['message'])

        # print(json.dumps(data, indent=2))
        content = data["choices"][0]["message"]["content"]
        # print(content)
        print(json.dumps(json.loads(content), indent=2))
        return type_adapter.validate_json(content) if type_adapter else content

 else:

    # This alternative branch uses Instructor + OpenAI client lib.
    # Instructor support streamed iterable responses, retry & more.
    # (see https://python.useinstructor.com/)
    #! pip install instructor openai
    import instructor, openai
    client = instructor.patch(
        openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
        mode=instructor.Mode.JSON_SCHEMA)
    create_completion = client.chat.completions.create


 def patch_tools_support(create_completion):
    def create_completion_with_tools(*, messages, max_tool_calls=None, response_model=None, tools=[], stream=None, **kwargs):
        if tools:
            assert response_model is not None, "Cannot use tools without response_model"
            
            assert stream is None, "stream is not supported with tools"

            messages = [{
                "role": "system",
                # You're a reliable assistant. You systematically use tools instead of doing the math yourself / manually / mentally.
                "content": '\n'.join([
                    # "You're a reliable assistant. You process requests step by step, using tools instead of doing things yourself / manually / mentally.",
                    # "You're a reliable assistant. You systematically use tools instead of doing things yourself / manually / mentally.",
                    # "At every step, you decide whether you need to use a tool, and say which one in your thoughts.",
                    # "At every step, think about which tool(s) you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool. When you're done, give the final result.",
                    # "You're a reliable assistant. You systematically use available tools instead of doing things by yourself / manually / mentally.",
                    f"You're a reliable assistant. You think step by step and use tools",# and can use the following tools: {', '.join(fn.__name__ for fn in tools)}.",
                    # "You're a reliable assistant. You pick the most appropriate tool at each step instead of doing anything yourself / manually / mentally.",
                    #"think about the name of the function of the next tool you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool (or give the final result directly if no tool call is needed).",
                    # "Think about the tool to call in the next step, if any (one of {', '.join(fn.__name__ for fn in tools)}) (give the final result directly if available).",

                    # f"Think about which tool is needed for each step, if any (one of {', '.join(fn.__name__ for fn in tools)}).",
                    # f'Think about which of the following tools is needed for each step, if any:',
                    # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
                    # f'You can use any of the following tools:',
                    # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
                    # f"You think out loud about the name of the tool function you need to use for each step of the process, if any (one of {', '.join(fn.__name__ for fn in tools)})",
                    # f'You think about which tool you need to use for the next step',
                    # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
                    # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
                ])
            }] + messages
            streamed_tool_response_model = build_tool_call_adapter(response_model, *tools)
            i = 0
            while (max_tool_calls is None or i < max_tool_calls):
                # print("Completion: ", json.dumps(messages, indent=2))
                response = create_completion(messages=messages, response_model=streamed_tool_response_model, **kwargs)
                # print("Got response: ", json.dumps(response.model_dump(), indent=2))
                # response = response.next_step
                # print(f'(Original goal: {response.original_goal})')
                print(f'💭 {response.thought_process}')
                if callable(response.next_step):
                    sys.stdout.write(f'⚙️  {response.next_step.function}(args={response.next_step.args.model_dump()})' )
                    result = response.next_step()
                    result_str = json.dumps(result.model_dump(), indent=2) if isinstance(result, BaseModel) else str(result)
                    sys.stdout.write(f'-> {result_str}\n')

                    # tool_response = f"Tool response: {result_str}"
                    # print(tool_response)
                    messages = messages + [{
                        "role": "assistant",
                        "content": json.dumps(response.model_dump(), indent=2),
                    }, {
                        "role": "user",
                        "content": f"Tool {response.next_step.function} response: {result_str}"
                    }]
                    i += 1
                else:
                    return response.next_step.final_answer
            if max_tool_calls is not None:
                raise Exception(f"Failed to get a valid response after {max_tool_calls} tool calls")

        return create_completion(messages=messages, response_model=response_model, **kwargs)

    return create_completion_with_tools

 create_completion = patch_tools_support(create_completion)

 if __name__ == '__main__':

    class QAPair(BaseModel):
        question: str
        concise_answer: str
        justification: str

    class PyramidalSummary(BaseModel):
        title: str
        summary: str
        question_answers: Annotated[List[QAPair], MinLen(2)]
        sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]

    # print("# Summary\n", create_completion(
    #     model="...",
    #     response_model=PyramidalSummary,
    #     messages=[{
    #         "role": "user",
    #         "content": f"""
    #             You are a highly efficient corporate document summarizer.
    #             Create a pyramidal summary of an imaginary internal document about our company processes
    #             (starting high-level, going down to each sub sections).
    #             Keep questions short, and answers even shorter (trivia / quizz style).
    #         """
    #     }]))
    
    import math

    def eval_python_expression(expr: str) -> float:
        """
            Evaluate a Python expression reliably.
            This can be used to compute complex nested mathematical expressions, or any python, really.
        """
        print("# Evaluating expression: ", expr)
        return "0.0"

    def add(a: float, b: float) -> float:
        """
            Add two numbers reliably.
            Don't use this tool to compute the square of a number (use multiply or pow instead)
        """
        return a + b
    
    # def say(something: str) -> str:
    #     """
    #         Just says something. Used to say each thought out loud
    #     """
    #     return subprocess.check_call(["say", something])

    def multiply(a: float, b: float) -> float:
        """Multiply a with b numbers reliably"""
        return a * b

    def divide(a: float, b: float) -> float:
        """Divide a by b two numbers reliably"""
        return a / b

    def pow(value: float, power: float) -> float:
        """
            Raise a value to a power (exponent) reliably.
            The square of x is pow(x, 2), its cube is pow(x, 3), etc.
        """
        return math.pow(value, power)

    result = create_completion(
        model="...",
        response_model=float,
        tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
        temperature=0.0,
        # logit_bias={
        #     i: 10.0
        #     for i in range(1, 259)
        # },
        messages=[{
            "role": "user",
            # "content": f"""
            #     What is 10 squared?
            # """
            "content": f"""
                What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
            """
                # Think step by step, start expressing the problem as an arithmetic expression
        }])
    
    # 💭 First, I need to square the number 2535. For this, I will use the 'pow' tool.
    # ⚙️  pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0
    # 💭 Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result.
    # ⚙️  add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0
    # 💭 Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5.
    # ⚙️  pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0
    # 💭 Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result.
    # ⚙️  divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8
    # 💭 I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer.
    # Result:  1928578857252334.8

    expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0
    print("➡️", result)
    assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}"
diff --git a/with-tools.py b/with-tools.py
 # Usage:
 #! ./server -m some-model.gguf &
 #! pip install pydantic
 #! python examples/json-schema-pydantic-example.py
 #
 # TODO:
 # - https://github.com/NousResearch/Hermes-Function-Calling
 #
 # <|im_start|>system
 # You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags
 # You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
 # <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals',
 # 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n    Args:\n    symbol (str): The stock symbol.\n\n    Returns:\n    dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} 
 # </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
 # <tool_call>
 # {'arguments': <args-dict>, 'name': <function-name>}
 # </tool_call><|im_end|>

 from dataclasses import dataclass
 import subprocess
 import sys
 from pydantic import BaseModel, TypeAdapter
 from annotated_types import MinLen
 from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin
 import json, requests

 def type_to_str(t):
    origin = get_origin(t)
    if origin is None:
        return t.__name__
    args = get_args(t)
    return origin.__name__ + (
        f'[{", ".join(type_to_str(a) for a in args)}]' if args else ''
    )

 def build_union_type_adapter(*types):
    src = '\n'.join([
        'from pydantic import TypeAdapter',
        'from typing import Union',
        f'_out = TypeAdapter(Union[{", ".join(type_to_str(t) for t in types)}])',
    ])
    globs = {
        **globals(),
        **{t.__name__: t for t in types},
    }
    exec(src, globs)
    return globs['_out']

 # def build_tool_call_adapter(final_output_type, *tools):
 #     lines = [
 #         'from pydantic import BaseModel, TypeAdapter',
 #         'from typing import Literal, Union',
 #     ]
 #     globs = {
 #         **globals(),
 #         **locals(),
 #         final_output_type.__name__: final_output_type,
 #     }
 #     tool_calls = []
 #     for fn in tools:
 #         # TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
 #         fn_name = fn.__name__
 #         fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
 #         name = fn_name.replace('_', ' ').title().replace(' ', '')
 #         lines += [
 #             f'class {name}ToolArgs(BaseModel):',
 #             *(f'  {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
 #             f'class {name}Tool(BaseModel):',
 #             *([f'  """{fn_doc}"""'] if fn_doc else []),
 #             f'  name: Literal["{fn_name}"]',
 #             f'  arguments: {name}ToolArgs',
 #             f'  def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
 #             f'    return {fn_name}(**self.arguments.dict())',
 #         ]
 #         tool_calls.append(f'{name}Tool')
    
 #     lines += [
 #         'class FinalResult(BaseModel):',
 #         f'  result: {type_to_str(final_output_type)}',
 #         'class Response(BaseModel):',
 #         f'  """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
 #         f'  original_goal: str',
 #         f'  thought_process: str',
 #         # f'  thought: str',
 #         f'  next_step: Union[FinalResult, {", ".join(tool_calls)}]',
 #         f'response_adapter = TypeAdapter(Response)'
 #     ]

 #     exec('\n'.join(lines), globs)
 #     return globs['response_adapter']

 # if True:

 #     def create_completion(*, response_model=None, tool_call_adapter=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
 #         '''
 #         Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
 #         (llama.cpp server, llama-cpp-python, Anyscale / Together...)

 #         The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
 #         '''
 #         if response_model:
 #             if isinstance(response_model, TypeAdapter):
 #                 type_adapter = response_model
 #             else:
 #                 type_adapter = TypeAdapter(response_model)
 #             schema = type_adapter.json_schema()
 #             messages = [{
 #                 "role": "system",
 #                 "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
 #             }] + messages
 #             # print("Completion: ", json.dumps(messages, indent=2))
 #             # print("SCHEMA: " + json.dumps(schema, indent=2))
 #             response_format={"type": "json_object", "schema": schema }

 #         data = requests.post(endpoint, headers={"Content-Type": "application/json"},
 #                              json=dict(messages=messages, response_format=response_format, **kwargs)).json()
 #         if 'error' in data:
 #             raise Exception(data['error']['message'])

 #         choice = data["choices"][0]

 #         # if choice.get("finish_reason") == "tool_calls":
 #         #     tool_calls = []
 #         #     [tool_call_adapter.validate_json(tool_call) for tool_call in choice["tool_calls"]]
 #         # print(json.dumps(data, indent=2))
 #         content = choice["message"]["content"]
 #         # print(content)
 #         print(json.dumps(json.loads(content), indent=2))
 #         return type_adapter.validate_json(content) if type_adapter else content

 # else:

 #     # This alternative branch uses Instructor + OpenAI client lib.
 #     # Instructor support streamed iterable responses, retry & more.
 #     # (see https://python.useinstructor.com/)
 #     #! pip install instructor openai
 #     import instructor, openai
 #     client = instructor.patch(
 #         openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
 #         mode=instructor.Mode.JSON_SCHEMA)
 #     create_completion = client.chat.completions.create


 # def patch_tools_support_pydantic(create_completion):
 #     def create_completion_with_tools(*, messages, max_tool_iterations=None, response_model=None, tools=[], stream=None, **kwargs):
 #         if tools:
 #             assert response_model is not None, "Cannot use tools without response_model"
            
 #             assert stream is None, "stream is not supported with tools"

 #             messages = [{
 #                 "role": "system",
 #                 # You're a reliable assistant. You systematically use tools instead of doing the math yourself / manually / mentally.
 #                 "content": '\n'.join([
 #                     # "You're a reliable assistant. You process requests step by step, using tools instead of doing things yourself / manually / mentally.",
 #                     # "You're a reliable assistant. You systematically use tools instead of doing things yourself / manually / mentally.",
 #                     # "At every step, you decide whether you need to use a tool, and say which one in your thoughts.",
 #                     # "At every step, think about which tool(s) you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool. When you're done, give the final result.",
 #                     # "You're a reliable assistant. You systematically use available tools instead of doing things by yourself / manually / mentally.",
 #                     f"You're a reliable assistant. You think step by step and use tools",# and can use the following tools: {', '.join(fn.__name__ for fn in tools)}.",
 #                     # "You're a reliable assistant. You pick the most appropriate tool at each step instead of doing anything yourself / manually / mentally.",
 #                     #"think about the name of the function of the next tool you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool (or give the final result directly if no tool call is needed).",
 #                     # "Think about the tool to call in the next step, if any (one of {', '.join(fn.__name__ for fn in tools)}) (give the final result directly if available).",

 #                     # f"Think about which tool is needed for each step, if any (one of {', '.join(fn.__name__ for fn in tools)}).",
 #                     # f'Think about which of the following tools is needed for each step, if any:',
 #                     # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
 #                     # f'You can use any of the following tools:',
 #                     # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
 #                     # f"You think out loud about the name of the tool function you need to use for each step of the process, if any (one of {', '.join(fn.__name__ for fn in tools)})",
 #                     # f'You think about which tool you need to use for the next step',
 #                     # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
 #                     # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
 #                 ])
 #             }] + messages
 #             streamed_tool_response_model = build_tool_call_adapter(response_model, *tools)
 #             i = 0
 #             while (max_tool_iterations is None or i < max_tool_iterations):
 #                 # print("Completion: ", json.dumps(messages, indent=2))
 #                 response = create_completion(messages=messages, response_model=streamed_tool_response_model, **kwargs)
 #                 # print("Got response: ", json.dumps(response.model_dump(), indent=2))
 #                 # response = response.next_step
 #                 # print(f'(Original goal: {response.original_goal})')
 #                 print(f'💭 {response.thought_process}')
 #                 if callable(response.next_step):
 #                     sys.stdout.write(f'⚙️  {response.next_step.function}(args={response.next_step.args.model_dump()})' )
 #                     result = response.next_step()
 #                     result_str = json.dumps(result.model_dump(), indent=2) if isinstance(result, BaseModel) else str(result)
 #                     sys.stdout.write(f'-> {result_str}\n')

 #                     # tool_response = f"Tool response: {result_str}"
 #                     # print(tool_response)
 #                     messages = messages + [{
 #                         "role": "assistant",
 #                         "content": json.dumps(response.model_dump(), indent=2),
 #                     }, {
 #                         "role": "user",
 #                         "content": f"Tool {response.next_step.function} response: {result_str}"
 #                     }]
 #                     i += 1
 #                 else:
 #                     return response.next_step.result
 #             if max_tool_iterations is not None:
 #                 raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls")

 #         return create_completion(messages=messages, response_model=response_model, **kwargs)

 #     return create_completion_with_tools

 class Thought(BaseModel):
    thought: str


 def build_tool_call_adapter2(final_output_type, *tools):
    lines = [
        'from pydantic import BaseModel, TypeAdapter',
        'from typing import Literal, Union',
    ]
    globs = {
        **globals(),
        **locals(),
        final_output_type.__name__: final_output_type,
    }
    tool_calls = []
    for fn in tools:
        # TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
        fn_name = fn.__name__
        fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
        name = fn_name.replace('_', ' ').title().replace(' ', '')
        lines += [
            f'class {name}ToolArgs(BaseModel):',
            *(f'  {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
            f'class {name}ToolCall(BaseModel):',
            *([f'  """{fn_doc}"""'] if fn_doc else []),
            f'  name: Literal["{fn_name}"]',
            f'  arguments: {name}ToolArgs',
            f'class {name}Tool(BaseModel):',
            # *([f'  """{fn_doc}"""'] if fn_doc else []),
            f'  id: str',
            f'  type: Literal["function"]',
            f'  function: {name}ToolCall',
            f'  def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
            f'    return {fn_name}(**self.function.arguments.dict())',
        ]
        tool_calls.append(f'{name}Tool')
    
    lines += [
        # 'class FinalResult(BaseModel):',
        # f'  result: {type_to_str(final_output_type)}',
        # 'class Response(BaseModel):',
        # f'  """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
        # f'  original_goal: str',
        # f'  thought_process: str',
        # # f'  thought: str',
        # f'  next_step: Union[FinalResult, {", ".join(tool_calls)}]',
        # f'response_adapter = TypeAdapter(Response)'
        f'response_adapter = TypeAdapter(Union[{", ".join(tool_calls)}])',
    ]

    exec('\n'.join(lines), globs)
    return globs['response_adapter']

 def create_completion2(*, response_model=None, tool_call_adapter=None, max_tool_iterations=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
    '''
    Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
    (llama.cpp server, llama-cpp-python, Anyscale / Together...)

    The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
    '''
    if response_model:
        type_adapter = build_union_type_adapter(response_model, Thought)
        # type_adapter = TypeAdapter(response_model)
        schema = type_adapter.json_schema()
        # messages = [{
        #     "role": "system",
        #     "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
        # }] + messages
        # print("Completion: ", json.dumps(messages, indent=2))
        # print("SCHEMA: " + json.dumps(schema, indent=2))
        response_format={"type": "json_object", "schema": schema }

    tool_call_adapter = build_tool_call_adapter2(response_model, *tools)
    tool_adapters = [(fn, TypeAdapter(fn)) for fn in tools]
    tools_schemas = [{
        "type": "function",
        "function": {
            "name": fn.__name__,
            "description": fn.__doc__,
            "parameters": ta.json_schema()
        }
    } for (fn, ta) in tool_adapters]

    messages = [{
        "role": "system",
        "content": '\n'.join([
    #         "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.",
    #         "You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:",
    #         f'<tools>{json.dumps(tools_schemas)}</tools>',
            'Before calling each tool, you think clearly and briefly about why and how you are using the tool.',
        ])
    }] + messages

    i = 0
    while (max_tool_iterations is None or i < max_tool_iterations):
        body=dict(
            messages=messages,
            response_format=response_format,
            tools=tools_schemas,
            **kwargs
        )
        # print("Completion: ", json.dumps(body, indent=2))
        data = requests.post(
            endpoint,
            headers={"Content-Type": "application/json"},
            json=body,
        ).json()
        if 'error' in data:
            raise Exception(data['error']['message'])

        # print(json.dumps(data, indent=2))
        choice = data["choices"][0]

        if choice.get("finish_reason") == "tool_calls":
            for tool_call in choice["tool_calls"]:
                # id = tool_call.get("id")
                # if id:
                #     del tool_call["id"]

                tc = tool_call_adapter.validate_json(json.dumps(tool_call))
                
                sys.stdout.write(f'⚙️  {tc.function.name}({", ".join(f"{k}={v}" for k, v in tc.function.arguments.model_dump().items())})' )
                result = tc()
                sys.stdout.write(f" -> {result}\n")
                messages.append({
                    "tool_call_id": tc.id,
                    "role": "tool",
                    "name": tc.function.name,
                    "content": f'{result}',
                })
        else:
            content = choice["message"]["content"]
            # print(content)
            # print(json.dumps(json.loads(content), indent=2))
            result = type_adapter.validate_json(content) if type_adapter else content
            if isinstance(result, Thought):
                print(f'💭 {result.thought}')
                messages.append({
                    "role": "assistant",
                    "content": json.dumps(result.model_dump(), indent=2),
                })
            else:
                return result

        i += 1

    if max_tool_iterations is not None:
        raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls")

    

 def patch_tools_support_openai(create_completion):
    def create_completion_with_tools(*, messages, max_tool_iterations=None, response_model=None, tools=[], stream=None, **kwargs):
        if tools:
            assert response_model is not None, "Cannot use tools without response_model"
            
            assert stream is None, "stream is not supported with tools"

            # fn_name = fn.__name__
            # fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
            # name = fn_name.replace('_', ' ').title().replace(' ', '')
            # lines += [
            #     f'class {name}ToolArgs(BaseModel):',
            #     *(f'  {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
            #     f'class {name}Tool(BaseModel):',
            #     *([f'  """{fn_doc}"""'] if fn_doc else []),
            #     f'  function: Literal["{fn_name}"]',
            #     f'  args: {name}ToolArgs',
            #     f'  def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',

        return create_completion(messages=messages, response_model=response_model, **kwargs)

    return create_completion_with_tools

 # create_completion = patch_tools_support_pydantic(create_completion)
 # create_completion = patch_tools_support_openai(create_completion)

 if __name__ == '__main__':

    class QAPair(BaseModel):
        question: str
        concise_answer: str
        justification: str

    class PyramidalSummary(BaseModel):
        title: str
        summary: str
        question_answers: Annotated[List[QAPair], MinLen(2)]
        sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]

    # print("# Summary\n", create_completion(
    #     model="...",
    #     response_model=PyramidalSummary,
    #     messages=[{
    #         "role": "user",
    #         "content": f"""
    #             You are a highly efficient corporate document summarizer.
    #             Create a pyramidal summary of an imaginary internal document about our company processes
    #             (starting high-level, going down to each sub sections).
    #             Keep questions short, and answers even shorter (trivia / quizz style).
    #         """
    #     }]))
    
    import math

    def eval_python_expression(expr: str) -> float:
        """
            Evaluate a Python expression reliably.
            This can be used to compute complex nested mathematical expressions, or any python, really.
        """
        print("# Evaluating expression: ", expr)
        return "0.0"

    def add(a: float, b: float) -> float:
        """
            Add two numbers reliably.
            Don't use this tool to compute the square of a number (use multiply or pow instead)
        """
        return a + b
    
    # def say(something: str) -> str:
    #     """
    #         Just says something. Used to say each thought out loud
    #     """
    #     return subprocess.check_call(["say", something])

    def multiply(a: float, b: float) -> float:
        """Multiply a with b numbers reliably"""
        return a * b

    def divide(a: float, b: float) -> float:
        """Divide a by b two numbers reliably"""
        return a / b

    def pow(value: float, power: float) -> float:
        """
            Raise a value to a power (exponent) reliably.
            The square of x is pow(x, 2), its cube is pow(x, 3), etc.
        """
        return math.pow(value, power)

    result = create_completion2(
        model="...",
        response_model=str,
        tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
        temperature=0.0,
        # logit_bias={
        #     i: 10.0
        #     for i in range(1, 259)
        # },
        messages=[{
            "role": "system",
            "content": f"""
                You are a reliable assistant. You think step by step and think before using tools
            """
        }, {
            "role": "user",
            # "content": f"""
            #     What is 10 squared?
            # """
            "content": f"""
                What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
            """
                # Think step by step, start expressing the problem as an arithmetic expression
        }])
    
    # result = create_completion(
    #     model="...",
    #     response_model=float,
    #     tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
    #     temperature=0.0,
    #     # logit_bias={
    #     #     i: 10.0
    #     #     for i in range(1, 259)
    #     # },
    #     messages=[{
    #         "role": "user",
    #         # "content": f"""
    #         #     What is 10 squared?
    #         # """
    #         "content": f"""
    #             What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
    #         """
    #             # Think step by step, start expressing the problem as an arithmetic expression
    #     }])
    
    # 💭 First, I need to square the number 2535. For this, I will use the 'pow' tool.
    # ⚙️  pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0
    # 💭 Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result.
    # ⚙️  add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0
    # 💭 Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5.
    # ⚙️  pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0
    # 💭 Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result.
    # ⚙️  divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8
    # 💭 I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer.
    # Result:  1928578857252334.8

    expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0
    print("➡️", result)
    assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}"
	# Usage:
	#! ./server -m some-model.gguf &
	#! pip install pydantic
	#! python examples/json-schema-pydantic-example.py
	#
	# TODO:
	# - https://github.com/NousResearch/Hermes-Function-Calling
	#
	# <\|im_start\|>system
	# You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags
	# You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
	# <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals',
	# 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol (str): The stock symbol.\n\n Returns:\n dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}}
	# </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
	# <tool_call>
	# {'arguments': <args-dict>, 'name': <function-name>}
	# </tool_call><\|im_end\|>

	from dataclasses import dataclass
	import subprocess
	import sys
	from pydantic import BaseModel, TypeAdapter
	from annotated_types import MinLen
	from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin
	import json, requests

	def type_to_str(t):
	origin = get_origin(t)
	if origin is None:
	return t.__name__
	args = get_args(t)
	return origin.__name__ + (
	f'[{", ".join(type_to_str(a) for a in args)}]' if args else ''
	)

	def build_tool_call_adapter(final_output_type, *tools):
	lines = [
	'from pydantic import BaseModel, TypeAdapter',
	'from typing import Literal, Union',
	]
	globs = {
	**globals(),
	**locals(),
	final_output_type.__name__: final_output_type,
	}
	tool_calls = []
	for fn in tools:
	# TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
	fn_name = fn.__name__
	fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
	name = fn_name.replace('_', ' ').title().replace(' ', '')
	lines += [
	f'class {name}ToolArgs(BaseModel):',
	*(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
	f'class {name}Tool(BaseModel):',
	*([f' """{fn_doc}"""'] if fn_doc else []),
	f' function: Literal["{fn_name}"]',
	f' args: {name}ToolArgs',
	f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
	f' return {fn_name}(**self.args.dict())',
	]
	tool_calls.append(f'{name}Tool')

	lines += [
	'class FinalResult(BaseModel):',
	f' final_answer: {type_to_str(final_output_type)}',
	'class Response(BaseModel):',
	f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
	f' original_goal: str',
	# f' thought_process: str',
	f' thought: str',
	f' next_step: Union[FinalResult, {", ".join(tool_calls)}]',
	f'response_adapter = TypeAdapter(Response)'
	]

	exec('\n'.join(lines), globs)
	return globs['response_adapter']

	if True:

	def create_completion(, response_model=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, *kwargs):
	'''
	Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
	(llama.cpp server, llama-cpp-python, Anyscale / Together...)

	The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
	'''
	if response_model:
	if isinstance(response_model, TypeAdapter):
	type_adapter = response_model
	else:
	type_adapter = TypeAdapter(response_model)
	schema = type_adapter.json_schema()
	messages = [{
	"role": "system",
	"content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
	}] + messages
	# print("Completion: ", json.dumps(messages, indent=2))
	# print("SCHEMA: " + json.dumps(schema, indent=2))
	response_format={"type": "json_object", "schema": schema }

	data = requests.post(endpoint, headers={"Content-Type": "application/json"},
	json=dict(messages=messages, tools=tools, response_format=response_format, **kwargs)).json()
	if 'error' in data:
	raise Exception(data['error']['message'])

	# print(json.dumps(data, indent=2))
	content = data["choices"][0]["message"]["content"]
	# print(content)
	print(json.dumps(json.loads(content), indent=2))
	return type_adapter.validate_json(content) if type_adapter else content

	else:

	# This alternative branch uses Instructor + OpenAI client lib.
	# Instructor support streamed iterable responses, retry & more.
	# (see https://python.useinstructor.com/)
	#! pip install instructor openai
	import instructor, openai
	client = instructor.patch(
	openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
	mode=instructor.Mode.JSON_SCHEMA)
	create_completion = client.chat.completions.create


	def patch_tools_support(create_completion):
	def create_completion_with_tools(, messages, max_tool_calls=None, response_model=None, tools=[], stream=None, *kwargs):
	if tools:
	assert response_model is not None, "Cannot use tools without response_model"

	assert stream is None, "stream is not supported with tools"

	messages = [{
	"role": "system",
	# You're a reliable assistant. You systematically use tools instead of doing the math yourself / manually / mentally.
	"content": '\n'.join([
	# "You're a reliable assistant. You process requests step by step, using tools instead of doing things yourself / manually / mentally.",
	# "You're a reliable assistant. You systematically use tools instead of doing things yourself / manually / mentally.",
	# "At every step, you decide whether you need to use a tool, and say which one in your thoughts.",
	# "At every step, think about which tool(s) you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool. When you're done, give the final result.",
	# "You're a reliable assistant. You systematically use available tools instead of doing things by yourself / manually / mentally.",
	f"You're a reliable assistant. You think step by step and use tools",# and can use the following tools: {', '.join(fn.__name__ for fn in tools)}.",
	# "You're a reliable assistant. You pick the most appropriate tool at each step instead of doing anything yourself / manually / mentally.",
	#"think about the name of the function of the next tool you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool (or give the final result directly if no tool call is needed).",
	# "Think about the tool to call in the next step, if any (one of {', '.join(fn.__name__ for fn in tools)}) (give the final result directly if available).",

	# f"Think about which tool is needed for each step, if any (one of {', '.join(fn.__name__ for fn in tools)}).",
	# f'Think about which of the following tools is needed for each step, if any:',
	# *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
	# f'You can use any of the following tools:',
	# *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools),
	# f"You think out loud about the name of the tool function you need to use for each step of the process, if any (one of {', '.join(fn.__name__ for fn in tools)})",
	# f'You think about which tool you need to use for the next step',
	# f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
	# f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.",
	])
	}] + messages
	streamed_tool_response_model = build_tool_call_adapter(response_model, *tools)
	i = 0
	while (max_tool_calls is None or i < max_tool_calls):
	# print("Completion: ", json.dumps(messages, indent=2))
	response = create_completion(messages=messages, response_model=streamed_tool_response_model, **kwargs)
	# print("Got response: ", json.dumps(response.model_dump(), indent=2))
	# response = response.next_step
	# print(f'(Original goal: {response.original_goal})')
	print(f'💭 {response.thought_process}')
	if callable(response.next_step):
	sys.stdout.write(f'⚙️ {response.next_step.function}(args={response.next_step.args.model_dump()})' )
	result = response.next_step()
	result_str = json.dumps(result.model_dump(), indent=2) if isinstance(result, BaseModel) else str(result)
	sys.stdout.write(f'-> {result_str}\n')

	# tool_response = f"Tool response: {result_str}"
	# print(tool_response)
	messages = messages + [{
	"role": "assistant",
	"content": json.dumps(response.model_dump(), indent=2),
	}, {
	"role": "user",
	"content": f"Tool {response.next_step.function} response: {result_str}"
	}]
	i += 1
	else:
	return response.next_step.final_answer
	if max_tool_calls is not None:
	raise Exception(f"Failed to get a valid response after {max_tool_calls} tool calls")

	return create_completion(messages=messages, response_model=response_model, **kwargs)

	return create_completion_with_tools

	create_completion = patch_tools_support(create_completion)

	if __name__ == '__main__':

	class QAPair(BaseModel):
	question: str
	concise_answer: str
	justification: str

	class PyramidalSummary(BaseModel):
	title: str
	summary: str
	question_answers: Annotated[List[QAPair], MinLen(2)]
	sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]

	# print("# Summary\n", create_completion(
	# model="...",
	# response_model=PyramidalSummary,
	# messages=[{
	# "role": "user",
	# "content": f"""
	# You are a highly efficient corporate document summarizer.
	# Create a pyramidal summary of an imaginary internal document about our company processes
	# (starting high-level, going down to each sub sections).
	# Keep questions short, and answers even shorter (trivia / quizz style).
	# """
	# }]))

	import math

	def eval_python_expression(expr: str) -> float:
	"""
	Evaluate a Python expression reliably.
	This can be used to compute complex nested mathematical expressions, or any python, really.
	"""
	print("# Evaluating expression: ", expr)
	return "0.0"

	def add(a: float, b: float) -> float:
	"""
	Add two numbers reliably.
	Don't use this tool to compute the square of a number (use multiply or pow instead)
	"""
	return a + b

	# def say(something: str) -> str:
	# """
	# Just says something. Used to say each thought out loud
	# """
	# return subprocess.check_call(["say", something])

	def multiply(a: float, b: float) -> float:
	"""Multiply a with b numbers reliably"""
	return a * b

	def divide(a: float, b: float) -> float:
	"""Divide a by b two numbers reliably"""
	return a / b

	def pow(value: float, power: float) -> float:
	"""
	Raise a value to a power (exponent) reliably.
	The square of x is pow(x, 2), its cube is pow(x, 3), etc.
	"""
	return math.pow(value, power)

	result = create_completion(
	model="...",
	response_model=float,
	tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
	temperature=0.0,
	# logit_bias={
	# i: 10.0
	# for i in range(1, 259)
	# },
	messages=[{
	"role": "user",
	# "content": f"""
	# What is 10 squared?
	# """
	"content": f"""
	What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
	"""
	# Think step by step, start expressing the problem as an arithmetic expression
	}])

	# 💭 First, I need to square the number 2535. For this, I will use the 'pow' tool.
	# ⚙️ pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0
	# 💭 Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result.
	# ⚙️ add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0
	# 💭 Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5.
	# ⚙️ pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0
	# 💭 Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result.
	# ⚙️ divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8
	# 💭 I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer.
	# Result: 1928578857252334.8

	expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0
	print("➡️", result)
	assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}"
No results found