-
-
Save ochafik/1c1dc3db94381bc72ffda78aba27e2c5 to your computer and use it in GitHub Desktop.
llama.cpp tool usage / agent example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Usage: | |
| #! ./server -m some-model.gguf & | |
| #! pip install pydantic | |
| #! python examples/json-schema-pydantic-example.py | |
| # | |
| # TODO: | |
| # - https://github.com/NousResearch/Hermes-Function-Calling | |
| # | |
| # <|im_start|>system | |
| # You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags | |
| # You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: | |
| # <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals', | |
| # 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol (str): The stock symbol.\n\n Returns:\n dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} | |
| # </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: | |
| # <tool_call> | |
| # {'arguments': <args-dict>, 'name': <function-name>} | |
| # </tool_call><|im_end|> | |
| from dataclasses import dataclass | |
| import subprocess | |
| import sys | |
| from pydantic import BaseModel, TypeAdapter | |
| from annotated_types import MinLen | |
| from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin | |
| import json, requests | |
| def type_to_str(t): | |
| origin = get_origin(t) | |
| if origin is None: | |
| return t.__name__ | |
| args = get_args(t) | |
| return origin.__name__ + ( | |
| f'[{", ".join(type_to_str(a) for a in args)}]' if args else '' | |
| ) | |
| def build_tool_call_adapter(final_output_type, *tools): | |
| lines = [ | |
| 'from pydantic import BaseModel, TypeAdapter', | |
| 'from typing import Literal, Union', | |
| ] | |
| globs = { | |
| **globals(), | |
| **locals(), | |
| final_output_type.__name__: final_output_type, | |
| } | |
| tool_calls = [] | |
| for fn in tools: | |
| #Β TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection! | |
| fn_name = fn.__name__ | |
| fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None | |
| name = fn_name.replace('_', ' ').title().replace(' ', '') | |
| lines += [ | |
| f'class {name}ToolArgs(BaseModel):', | |
| *(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'), | |
| f'class {name}Tool(BaseModel):', | |
| *([f' """{fn_doc}"""'] if fn_doc else []), | |
| f' function: Literal["{fn_name}"]', | |
| f' args: {name}ToolArgs', | |
| f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:', | |
| f' return {fn_name}(**self.args.dict())', | |
| ] | |
| tool_calls.append(f'{name}Tool') | |
| lines += [ | |
| 'class FinalResult(BaseModel):', | |
| f' final_answer: {type_to_str(final_output_type)}', | |
| 'class Response(BaseModel):', | |
| f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""', | |
| f' original_goal: str', | |
| # f' thought_process: str', | |
| f' thought: str', | |
| f' next_step: Union[FinalResult, {", ".join(tool_calls)}]', | |
| f'response_adapter = TypeAdapter(Response)' | |
| ] | |
| exec('\n'.join(lines), globs) | |
| return globs['response_adapter'] | |
| if True: | |
| def create_completion(*, response_model=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs): | |
| ''' | |
| Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support | |
| (llama.cpp server, llama-cpp-python, Anyscale / Together...) | |
| The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below) | |
| ''' | |
| if response_model: | |
| if isinstance(response_model, TypeAdapter): | |
| type_adapter = response_model | |
| else: | |
| type_adapter = TypeAdapter(response_model) | |
| schema = type_adapter.json_schema() | |
| messages = [{ | |
| "role": "system", | |
| "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" | |
| }] + messages | |
| # print("Completion: ", json.dumps(messages, indent=2)) | |
| # print("SCHEMA: " + json.dumps(schema, indent=2)) | |
| response_format={"type": "json_object", "schema": schema } | |
| data = requests.post(endpoint, headers={"Content-Type": "application/json"}, | |
| json=dict(messages=messages, tools=tools, response_format=response_format, **kwargs)).json() | |
| if 'error' in data: | |
| raise Exception(data['error']['message']) | |
| # print(json.dumps(data, indent=2)) | |
| content = data["choices"][0]["message"]["content"] | |
| # print(content) | |
| print(json.dumps(json.loads(content), indent=2)) | |
| return type_adapter.validate_json(content) if type_adapter else content | |
| else: | |
| # This alternative branch uses Instructor + OpenAI client lib. | |
| # Instructor support streamed iterable responses, retry & more. | |
| # (see https://python.useinstructor.com/) | |
| #! pip install instructor openai | |
| import instructor, openai | |
| client = instructor.patch( | |
| openai.OpenAI(api_key="123", base_url="http://localhost:8080"), | |
| mode=instructor.Mode.JSON_SCHEMA) | |
| create_completion = client.chat.completions.create | |
| def patch_tools_support(create_completion): | |
| def create_completion_with_tools(*, messages, max_tool_calls=None, response_model=None, tools=[], stream=None, **kwargs): | |
| if tools: | |
| assert response_model is not None, "Cannot use tools without response_model" | |
| assert stream is None, "stream is not supported with tools" | |
| messages = [{ | |
| "role": "system", | |
| # You're a reliable assistant. You systematically use tools instead of doing the math yourself / manually / mentally. | |
| "content": '\n'.join([ | |
| # "You're a reliable assistant. You process requests step by step, using tools instead of doing things yourself / manually / mentally.", | |
| # "You're a reliable assistant. You systematically use tools instead of doing things yourself / manually / mentally.", | |
| # "At every step, you decide whether you need to use a tool, and say which one in your thoughts.", | |
| # "At every step, think about which tool(s) you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool. When you're done, give the final result.", | |
| # "You're a reliable assistant. You systematically use available tools instead of doing things by yourself / manually / mentally.", | |
| f"You're a reliable assistant. You think step by step and use tools",# and can use the following tools: {', '.join(fn.__name__ for fn in tools)}.", | |
| # "You're a reliable assistant. You pick the most appropriate tool at each step instead of doing anything yourself / manually / mentally.", | |
| #"think about the name of the function of the next tool you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool (or give the final result directly if no tool call is needed).", | |
| # "Think about the tool to call in the next step, if any (one of {', '.join(fn.__name__ for fn in tools)}) (give the final result directly if available).", | |
| # f"Think about which tool is needed for each step, if any (one of {', '.join(fn.__name__ for fn in tools)}).", | |
| # f'Think about which of the following tools is needed for each step, if any:', | |
| # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools), | |
| # f'You can use any of the following tools:', | |
| # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools), | |
| # f"You think out loud about the name of the tool function you need to use for each step of the process, if any (one of {', '.join(fn.__name__ for fn in tools)})", | |
| # f'You think about which tool you need to use for the next step', | |
| # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.", | |
| # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.", | |
| ]) | |
| }] + messages | |
| streamed_tool_response_model = build_tool_call_adapter(response_model, *tools) | |
| i = 0 | |
| while (max_tool_calls is None or i < max_tool_calls): | |
| # print("Completion: ", json.dumps(messages, indent=2)) | |
| response = create_completion(messages=messages, response_model=streamed_tool_response_model, **kwargs) | |
| # print("Got response: ", json.dumps(response.model_dump(), indent=2)) | |
| # response = response.next_step | |
| # print(f'(Original goal: {response.original_goal})') | |
| print(f'π {response.thought_process}') | |
| if callable(response.next_step): | |
| sys.stdout.write(f'βοΈ {response.next_step.function}(args={response.next_step.args.model_dump()})' ) | |
| result = response.next_step() | |
| result_str = json.dumps(result.model_dump(), indent=2) if isinstance(result, BaseModel) else str(result) | |
| sys.stdout.write(f'-> {result_str}\n') | |
| # tool_response = f"Tool response: {result_str}" | |
| # print(tool_response) | |
| messages = messages + [{ | |
| "role": "assistant", | |
| "content": json.dumps(response.model_dump(), indent=2), | |
| }, { | |
| "role": "user", | |
| "content": f"Tool {response.next_step.function} response: {result_str}" | |
| }] | |
| i += 1 | |
| else: | |
| return response.next_step.final_answer | |
| if max_tool_calls is not None: | |
| raise Exception(f"Failed to get a valid response after {max_tool_calls} tool calls") | |
| return create_completion(messages=messages, response_model=response_model, **kwargs) | |
| return create_completion_with_tools | |
| create_completion = patch_tools_support(create_completion) | |
| if __name__ == '__main__': | |
| class QAPair(BaseModel): | |
| question: str | |
| concise_answer: str | |
| justification: str | |
| class PyramidalSummary(BaseModel): | |
| title: str | |
| summary: str | |
| question_answers: Annotated[List[QAPair], MinLen(2)] | |
| sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]] | |
| # print("# Summary\n", create_completion( | |
| # model="...", | |
| # response_model=PyramidalSummary, | |
| # messages=[{ | |
| # "role": "user", | |
| # "content": f""" | |
| # You are a highly efficient corporate document summarizer. | |
| # Create a pyramidal summary of an imaginary internal document about our company processes | |
| # (starting high-level, going down to each sub sections). | |
| # Keep questions short, and answers even shorter (trivia / quizz style). | |
| # """ | |
| # }])) | |
| import math | |
| def eval_python_expression(expr: str) -> float: | |
| """ | |
| Evaluate a Python expression reliably. | |
| This can be used to compute complex nested mathematical expressions, or any python, really. | |
| """ | |
| print("# Evaluating expression: ", expr) | |
| return "0.0" | |
| def add(a: float, b: float) -> float: | |
| """ | |
| Add two numbers reliably. | |
| Don't use this tool to compute the square of a number (use multiply or pow instead) | |
| """ | |
| return a + b | |
| # def say(something: str) -> str: | |
| # """ | |
| # Just says something. Used to say each thought out loud | |
| # """ | |
| # return subprocess.check_call(["say", something]) | |
| def multiply(a: float, b: float) -> float: | |
| """Multiply a with b numbers reliably""" | |
| return a * b | |
| def divide(a: float, b: float) -> float: | |
| """Divide a by b two numbers reliably""" | |
| return a / b | |
| def pow(value: float, power: float) -> float: | |
| """ | |
| Raise a value to a power (exponent) reliably. | |
| The square of x is pow(x, 2), its cube is pow(x, 3), etc. | |
| """ | |
| return math.pow(value, power) | |
| result = create_completion( | |
| model="...", | |
| response_model=float, | |
| tools=[add, multiply, divide, pow], #, say],#, eval_python_expression], | |
| temperature=0.0, | |
| # logit_bias={ | |
| # i: 10.0 | |
| # for i in range(1, 259) | |
| # }, | |
| messages=[{ | |
| "role": "user", | |
| # "content": f""" | |
| # What is 10 squared? | |
| # """ | |
| "content": f""" | |
| What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result? | |
| """ | |
| # Think step by step, start expressing the problem as an arithmetic expression | |
| }]) | |
| # π First, I need to square the number 2535. For this, I will use the 'pow' tool. | |
| # βοΈ pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0 | |
| # π Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result. | |
| # βοΈ add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0 | |
| # π Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5. | |
| # βοΈ pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0 | |
| # π Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result. | |
| # βοΈ divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8 | |
| # π I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer. | |
| # Result: 1928578857252334.8 | |
| expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0 | |
| print("β‘οΈ", result) | |
| assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Usage: | |
| #! ./server -m some-model.gguf & | |
| #! pip install pydantic | |
| #! python examples/json-schema-pydantic-example.py | |
| # | |
| # TODO: | |
| # - https://github.com/NousResearch/Hermes-Function-Calling | |
| # | |
| # <|im_start|>system | |
| # You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags | |
| # You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: | |
| # <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals', | |
| # 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol (str): The stock symbol.\n\n Returns:\n dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} | |
| # </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: | |
| # <tool_call> | |
| # {'arguments': <args-dict>, 'name': <function-name>} | |
| # </tool_call><|im_end|> | |
| from dataclasses import dataclass | |
| import subprocess | |
| import sys | |
| from pydantic import BaseModel, TypeAdapter | |
| from annotated_types import MinLen | |
| from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin | |
| import json, requests | |
| def type_to_str(t): | |
| origin = get_origin(t) | |
| if origin is None: | |
| return t.__name__ | |
| args = get_args(t) | |
| return origin.__name__ + ( | |
| f'[{", ".join(type_to_str(a) for a in args)}]' if args else '' | |
| ) | |
| def build_union_type_adapter(*types): | |
| src = '\n'.join([ | |
| 'from pydantic import TypeAdapter', | |
| 'from typing import Union', | |
| f'_out = TypeAdapter(Union[{", ".join(type_to_str(t) for t in types)}])', | |
| ]) | |
| globs = { | |
| **globals(), | |
| **{t.__name__: t for t in types}, | |
| } | |
| exec(src, globs) | |
| return globs['_out'] | |
| # def build_tool_call_adapter(final_output_type, *tools): | |
| # lines = [ | |
| # 'from pydantic import BaseModel, TypeAdapter', | |
| # 'from typing import Literal, Union', | |
| # ] | |
| # globs = { | |
| # **globals(), | |
| # **locals(), | |
| # final_output_type.__name__: final_output_type, | |
| # } | |
| # tool_calls = [] | |
| # for fn in tools: | |
| # #Β TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection! | |
| # fn_name = fn.__name__ | |
| # fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None | |
| # name = fn_name.replace('_', ' ').title().replace(' ', '') | |
| # lines += [ | |
| # f'class {name}ToolArgs(BaseModel):', | |
| # *(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'), | |
| # f'class {name}Tool(BaseModel):', | |
| # *([f' """{fn_doc}"""'] if fn_doc else []), | |
| # f' name: Literal["{fn_name}"]', | |
| # f' arguments: {name}ToolArgs', | |
| # f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:', | |
| # f' return {fn_name}(**self.arguments.dict())', | |
| # ] | |
| # tool_calls.append(f'{name}Tool') | |
| # lines += [ | |
| # 'class FinalResult(BaseModel):', | |
| # f' result: {type_to_str(final_output_type)}', | |
| # 'class Response(BaseModel):', | |
| # f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""', | |
| # f' original_goal: str', | |
| # f' thought_process: str', | |
| # # f' thought: str', | |
| # f' next_step: Union[FinalResult, {", ".join(tool_calls)}]', | |
| # f'response_adapter = TypeAdapter(Response)' | |
| # ] | |
| # exec('\n'.join(lines), globs) | |
| # return globs['response_adapter'] | |
| # if True: | |
| # def create_completion(*, response_model=None, tool_call_adapter=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs): | |
| # ''' | |
| # Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support | |
| # (llama.cpp server, llama-cpp-python, Anyscale / Together...) | |
| # The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below) | |
| # ''' | |
| # if response_model: | |
| # if isinstance(response_model, TypeAdapter): | |
| # type_adapter = response_model | |
| # else: | |
| # type_adapter = TypeAdapter(response_model) | |
| # schema = type_adapter.json_schema() | |
| # messages = [{ | |
| # "role": "system", | |
| # "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" | |
| # }] + messages | |
| # # print("Completion: ", json.dumps(messages, indent=2)) | |
| # # print("SCHEMA: " + json.dumps(schema, indent=2)) | |
| # response_format={"type": "json_object", "schema": schema } | |
| # data = requests.post(endpoint, headers={"Content-Type": "application/json"}, | |
| # json=dict(messages=messages, response_format=response_format, **kwargs)).json() | |
| # if 'error' in data: | |
| # raise Exception(data['error']['message']) | |
| # choice = data["choices"][0] | |
| # # if choice.get("finish_reason") == "tool_calls": | |
| # # tool_calls = [] | |
| # # [tool_call_adapter.validate_json(tool_call) for tool_call in choice["tool_calls"]] | |
| # # print(json.dumps(data, indent=2)) | |
| # content = choice["message"]["content"] | |
| # # print(content) | |
| # print(json.dumps(json.loads(content), indent=2)) | |
| # return type_adapter.validate_json(content) if type_adapter else content | |
| # else: | |
| # # This alternative branch uses Instructor + OpenAI client lib. | |
| # # Instructor support streamed iterable responses, retry & more. | |
| # # (see https://python.useinstructor.com/) | |
| # #! pip install instructor openai | |
| # import instructor, openai | |
| # client = instructor.patch( | |
| # openai.OpenAI(api_key="123", base_url="http://localhost:8080"), | |
| # mode=instructor.Mode.JSON_SCHEMA) | |
| # create_completion = client.chat.completions.create | |
| # def patch_tools_support_pydantic(create_completion): | |
| # def create_completion_with_tools(*, messages, max_tool_iterations=None, response_model=None, tools=[], stream=None, **kwargs): | |
| # if tools: | |
| # assert response_model is not None, "Cannot use tools without response_model" | |
| # assert stream is None, "stream is not supported with tools" | |
| # messages = [{ | |
| # "role": "system", | |
| # # You're a reliable assistant. You systematically use tools instead of doing the math yourself / manually / mentally. | |
| # "content": '\n'.join([ | |
| # # "You're a reliable assistant. You process requests step by step, using tools instead of doing things yourself / manually / mentally.", | |
| # # "You're a reliable assistant. You systematically use tools instead of doing things yourself / manually / mentally.", | |
| # # "At every step, you decide whether you need to use a tool, and say which one in your thoughts.", | |
| # # "At every step, think about which tool(s) you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool. When you're done, give the final result.", | |
| # # "You're a reliable assistant. You systematically use available tools instead of doing things by yourself / manually / mentally.", | |
| # f"You're a reliable assistant. You think step by step and use tools",# and can use the following tools: {', '.join(fn.__name__ for fn in tools)}.", | |
| # # "You're a reliable assistant. You pick the most appropriate tool at each step instead of doing anything yourself / manually / mentally.", | |
| # #"think about the name of the function of the next tool you need to use, if any (one of {', '.join(fn.__name__ for fn in tools)}), then call the next tool (or give the final result directly if no tool call is needed).", | |
| # # "Think about the tool to call in the next step, if any (one of {', '.join(fn.__name__ for fn in tools)}) (give the final result directly if available).", | |
| # # f"Think about which tool is needed for each step, if any (one of {', '.join(fn.__name__ for fn in tools)}).", | |
| # # f'Think about which of the following tools is needed for each step, if any:', | |
| # # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools), | |
| # # f'You can use any of the following tools:', | |
| # # *(f'- {fn.__name__}: {fn.__doc__}' for fn in tools), | |
| # # f"You think out loud about the name of the tool function you need to use for each step of the process, if any (one of {', '.join(fn.__name__ for fn in tools)})", | |
| # # f'You think about which tool you need to use for the next step', | |
| # # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.", | |
| # # f"Think about which tool is needed for each step, and why it's appropriate (compare to other tools if needed). Don't forget to describe the values of the arguments you'll use.", | |
| # ]) | |
| # }] + messages | |
| # streamed_tool_response_model = build_tool_call_adapter(response_model, *tools) | |
| # i = 0 | |
| # while (max_tool_iterations is None or i < max_tool_iterations): | |
| # # print("Completion: ", json.dumps(messages, indent=2)) | |
| # response = create_completion(messages=messages, response_model=streamed_tool_response_model, **kwargs) | |
| # # print("Got response: ", json.dumps(response.model_dump(), indent=2)) | |
| # # response = response.next_step | |
| # # print(f'(Original goal: {response.original_goal})') | |
| # print(f'π {response.thought_process}') | |
| # if callable(response.next_step): | |
| # sys.stdout.write(f'βοΈ {response.next_step.function}(args={response.next_step.args.model_dump()})' ) | |
| # result = response.next_step() | |
| # result_str = json.dumps(result.model_dump(), indent=2) if isinstance(result, BaseModel) else str(result) | |
| # sys.stdout.write(f'-> {result_str}\n') | |
| # # tool_response = f"Tool response: {result_str}" | |
| # # print(tool_response) | |
| # messages = messages + [{ | |
| # "role": "assistant", | |
| # "content": json.dumps(response.model_dump(), indent=2), | |
| # }, { | |
| # "role": "user", | |
| # "content": f"Tool {response.next_step.function} response: {result_str}" | |
| # }] | |
| # i += 1 | |
| # else: | |
| # return response.next_step.result | |
| # if max_tool_iterations is not None: | |
| # raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls") | |
| # return create_completion(messages=messages, response_model=response_model, **kwargs) | |
| # return create_completion_with_tools | |
| class Thought(BaseModel): | |
| thought: str | |
| def build_tool_call_adapter2(final_output_type, *tools): | |
| lines = [ | |
| 'from pydantic import BaseModel, TypeAdapter', | |
| 'from typing import Literal, Union', | |
| ] | |
| globs = { | |
| **globals(), | |
| **locals(), | |
| final_output_type.__name__: final_output_type, | |
| } | |
| tool_calls = [] | |
| for fn in tools: | |
| #Β TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection! | |
| fn_name = fn.__name__ | |
| fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None | |
| name = fn_name.replace('_', ' ').title().replace(' ', '') | |
| lines += [ | |
| f'class {name}ToolArgs(BaseModel):', | |
| *(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'), | |
| f'class {name}ToolCall(BaseModel):', | |
| *([f' """{fn_doc}"""'] if fn_doc else []), | |
| f' name: Literal["{fn_name}"]', | |
| f' arguments: {name}ToolArgs', | |
| f'class {name}Tool(BaseModel):', | |
| # *([f' """{fn_doc}"""'] if fn_doc else []), | |
| f' id: str', | |
| f' type: Literal["function"]', | |
| f' function: {name}ToolCall', | |
| f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:', | |
| f' return {fn_name}(**self.function.arguments.dict())', | |
| ] | |
| tool_calls.append(f'{name}Tool') | |
| lines += [ | |
| # 'class FinalResult(BaseModel):', | |
| # f' result: {type_to_str(final_output_type)}', | |
| # 'class Response(BaseModel):', | |
| # f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""', | |
| # f' original_goal: str', | |
| # f' thought_process: str', | |
| # # f' thought: str', | |
| # f' next_step: Union[FinalResult, {", ".join(tool_calls)}]', | |
| # f'response_adapter = TypeAdapter(Response)' | |
| f'response_adapter = TypeAdapter(Union[{", ".join(tool_calls)}])', | |
| ] | |
| exec('\n'.join(lines), globs) | |
| return globs['response_adapter'] | |
| def create_completion2(*, response_model=None, tool_call_adapter=None, max_tool_iterations=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs): | |
| ''' | |
| Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support | |
| (llama.cpp server, llama-cpp-python, Anyscale / Together...) | |
| The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below) | |
| ''' | |
| if response_model: | |
| type_adapter = build_union_type_adapter(response_model, Thought) | |
| # type_adapter = TypeAdapter(response_model) | |
| schema = type_adapter.json_schema() | |
| # messages = [{ | |
| # "role": "system", | |
| # "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" | |
| # }] + messages | |
| # print("Completion: ", json.dumps(messages, indent=2)) | |
| # print("SCHEMA: " + json.dumps(schema, indent=2)) | |
| response_format={"type": "json_object", "schema": schema } | |
| tool_call_adapter = build_tool_call_adapter2(response_model, *tools) | |
| tool_adapters = [(fn, TypeAdapter(fn)) for fn in tools] | |
| tools_schemas = [{ | |
| "type": "function", | |
| "function": { | |
| "name": fn.__name__, | |
| "description": fn.__doc__, | |
| "parameters": ta.json_schema() | |
| } | |
| } for (fn, ta) in tool_adapters] | |
| messages = [{ | |
| "role": "system", | |
| "content": '\n'.join([ | |
| # "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.", | |
| # "You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:", | |
| # f'<tools>{json.dumps(tools_schemas)}</tools>', | |
| 'Before calling each tool, you think clearly and briefly about why and how you are using the tool.', | |
| ]) | |
| }] + messages | |
| i = 0 | |
| while (max_tool_iterations is None or i < max_tool_iterations): | |
| body=dict( | |
| messages=messages, | |
| response_format=response_format, | |
| tools=tools_schemas, | |
| **kwargs | |
| ) | |
| # print("Completion: ", json.dumps(body, indent=2)) | |
| data = requests.post( | |
| endpoint, | |
| headers={"Content-Type": "application/json"}, | |
| json=body, | |
| ).json() | |
| if 'error' in data: | |
| raise Exception(data['error']['message']) | |
| # print(json.dumps(data, indent=2)) | |
| choice = data["choices"][0] | |
| if choice.get("finish_reason") == "tool_calls": | |
| for tool_call in choice["tool_calls"]: | |
| # id = tool_call.get("id") | |
| # if id: | |
| # del tool_call["id"] | |
| tc = tool_call_adapter.validate_json(json.dumps(tool_call)) | |
| sys.stdout.write(f'βοΈ {tc.function.name}({", ".join(f"{k}={v}" for k, v in tc.function.arguments.model_dump().items())})' ) | |
| result = tc() | |
| sys.stdout.write(f" -> {result}\n") | |
| messages.append({ | |
| "tool_call_id": tc.id, | |
| "role": "tool", | |
| "name": tc.function.name, | |
| "content": f'{result}', | |
| }) | |
| else: | |
| content = choice["message"]["content"] | |
| # print(content) | |
| # print(json.dumps(json.loads(content), indent=2)) | |
| result = type_adapter.validate_json(content) if type_adapter else content | |
| if isinstance(result, Thought): | |
| print(f'π {result.thought}') | |
| messages.append({ | |
| "role": "assistant", | |
| "content": json.dumps(result.model_dump(), indent=2), | |
| }) | |
| else: | |
| return result | |
| i += 1 | |
| if max_tool_iterations is not None: | |
| raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls") | |
| def patch_tools_support_openai(create_completion): | |
| def create_completion_with_tools(*, messages, max_tool_iterations=None, response_model=None, tools=[], stream=None, **kwargs): | |
| if tools: | |
| assert response_model is not None, "Cannot use tools without response_model" | |
| assert stream is None, "stream is not supported with tools" | |
| # fn_name = fn.__name__ | |
| # fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None | |
| # name = fn_name.replace('_', ' ').title().replace(' ', '') | |
| # lines += [ | |
| # f'class {name}ToolArgs(BaseModel):', | |
| # *(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'), | |
| # f'class {name}Tool(BaseModel):', | |
| # *([f' """{fn_doc}"""'] if fn_doc else []), | |
| # f' function: Literal["{fn_name}"]', | |
| # f' args: {name}ToolArgs', | |
| # f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:', | |
| return create_completion(messages=messages, response_model=response_model, **kwargs) | |
| return create_completion_with_tools | |
| # create_completion = patch_tools_support_pydantic(create_completion) | |
| # create_completion = patch_tools_support_openai(create_completion) | |
| if __name__ == '__main__': | |
| class QAPair(BaseModel): | |
| question: str | |
| concise_answer: str | |
| justification: str | |
| class PyramidalSummary(BaseModel): | |
| title: str | |
| summary: str | |
| question_answers: Annotated[List[QAPair], MinLen(2)] | |
| sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]] | |
| # print("# Summary\n", create_completion( | |
| # model="...", | |
| # response_model=PyramidalSummary, | |
| # messages=[{ | |
| # "role": "user", | |
| # "content": f""" | |
| # You are a highly efficient corporate document summarizer. | |
| # Create a pyramidal summary of an imaginary internal document about our company processes | |
| # (starting high-level, going down to each sub sections). | |
| # Keep questions short, and answers even shorter (trivia / quizz style). | |
| # """ | |
| # }])) | |
| import math | |
| def eval_python_expression(expr: str) -> float: | |
| """ | |
| Evaluate a Python expression reliably. | |
| This can be used to compute complex nested mathematical expressions, or any python, really. | |
| """ | |
| print("# Evaluating expression: ", expr) | |
| return "0.0" | |
| def add(a: float, b: float) -> float: | |
| """ | |
| Add two numbers reliably. | |
| Don't use this tool to compute the square of a number (use multiply or pow instead) | |
| """ | |
| return a + b | |
| # def say(something: str) -> str: | |
| # """ | |
| # Just says something. Used to say each thought out loud | |
| # """ | |
| # return subprocess.check_call(["say", something]) | |
| def multiply(a: float, b: float) -> float: | |
| """Multiply a with b numbers reliably""" | |
| return a * b | |
| def divide(a: float, b: float) -> float: | |
| """Divide a by b two numbers reliably""" | |
| return a / b | |
| def pow(value: float, power: float) -> float: | |
| """ | |
| Raise a value to a power (exponent) reliably. | |
| The square of x is pow(x, 2), its cube is pow(x, 3), etc. | |
| """ | |
| return math.pow(value, power) | |
| result = create_completion2( | |
| model="...", | |
| response_model=str, | |
| tools=[add, multiply, divide, pow], #, say],#, eval_python_expression], | |
| temperature=0.0, | |
| # logit_bias={ | |
| # i: 10.0 | |
| # for i in range(1, 259) | |
| # }, | |
| messages=[{ | |
| "role": "system", | |
| "content": f""" | |
| You are a reliable assistant. You think step by step and think before using tools | |
| """ | |
| }, { | |
| "role": "user", | |
| # "content": f""" | |
| # What is 10 squared? | |
| # """ | |
| "content": f""" | |
| What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result? | |
| """ | |
| # Think step by step, start expressing the problem as an arithmetic expression | |
| }]) | |
| # result = create_completion( | |
| # model="...", | |
| # response_model=float, | |
| # tools=[add, multiply, divide, pow], #, say],#, eval_python_expression], | |
| # temperature=0.0, | |
| # # logit_bias={ | |
| # # i: 10.0 | |
| # # for i in range(1, 259) | |
| # # }, | |
| # messages=[{ | |
| # "role": "user", | |
| # # "content": f""" | |
| # # What is 10 squared? | |
| # # """ | |
| # "content": f""" | |
| # What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result? | |
| # """ | |
| # # Think step by step, start expressing the problem as an arithmetic expression | |
| # }]) | |
| # π First, I need to square the number 2535. For this, I will use the 'pow' tool. | |
| # βοΈ pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0 | |
| # π Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result. | |
| # βοΈ add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0 | |
| # π Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5. | |
| # βοΈ pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0 | |
| # π Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result. | |
| # βοΈ divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8 | |
| # π I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer. | |
| # Result: 1928578857252334.8 | |
| expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0 | |
| print("β‘οΈ", result) | |
| assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment