Skip to content

Instantly share code, notes, and snippets.

@thoraxe
Created February 19, 2025 15:11
Show Gist options
  • Save thoraxe/e06e2142bbe9b8984e53770c3e4cbc6f to your computer and use it in GitHub Desktop.
Save thoraxe/e06e2142bbe9b8984e53770c3e4cbc6f to your computer and use it in GitHub Desktop.
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import os
import fire
from ticker_data import get_ticker_data
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.types.agent_create_params import AgentConfig
async def run_main(host: str, port: int, disable_safety: bool = False):
client = LlamaStackClient(
base_url=f"http://{host}:{port}",
)
available_shields = [shield.identifier for shield in client.shields.list()]
if not available_shields:
print("No available shields. Disable safety.")
else:
print(f"Available shields found: {available_shields}")
available_models = [
model.identifier for model in client.models.list() if model.model_type == "llm"
]
supported_models = [x for x in available_models if "3.2" in x and "Vision" not in x]
if not supported_models:
raise ValueError(
"No supported models found. Make sure to have a Llama 3.2 model."
)
else:
selected_model = supported_models[0]
print(f"Using model: {selected_model}")
client_tools = [
get_ticker_data
]
agent_config = AgentConfig(
model=selected_model,
instructions="""You are a helpful assistant with access to the following
function calls. Your task is to produce a list of function calls
necessary to generate response to the user utterance. Use the following
function calls as required.""",
sampling_params={
"strategy": {"type": "top_p", "temperature": 1.0, "top_p": 0.9},
},
toolgroups=[
"builtin::code_interpreter",
],
client_tools=[
client_tool.get_tool_definition() for client_tool in client_tools
],
tool_choice="auto",
tool_prompt_format="python_list",
input_shields=available_shields if available_shields else [],
output_shields=available_shields if available_shields else [],
enable_session_persistence=False,
)
agent = Agent(client, agent_config, client_tools)
session_id = agent.create_session("test-session")
print(f"Created session_id={session_id} for Agent({agent.agent_id})")
user_prompts = [
"What was the closing price of Google stock (ticker symbol GOOG) for 2023 ?"
]
for prompt in user_prompts:
response = agent.create_turn(
messages=[
{
"role": "user",
"content": prompt,
}
],
session_id=session_id,
)
for log in EventLogger().log(response):
log.print()
def main(host: str, port: int):
asyncio.run(run_main(host, port))
if __name__ == "__main__":
fire.Fire(main)
Listening on ['::', '0.0.0.0']:5000
INFO: Started server process [3094321]
INFO: Waiting for application startup.
INFO: ASGI 'lifespan' protocol appears unsupported.
INFO: Application startup complete.
INFO: Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit)
INFO: ::1:43446 - "GET /v1/shields HTTP/1.1" 200 OK
15:09:38.783 [START] /v1/shields
INFO: ::1:43446 - "GET /v1/models HTTP/1.1" 200 OK
INFO: ::1:43446 - "POST /v1/agents HTTP/1.1" 200 OK
15:09:38.795 [END] /v1/shields [StatusCode.OK] (12.01ms)
15:09:38.797 [START] /v1/models
INFO: ::1:43446 - "GET /v1/tools?toolgroup_id=builtin%3A%3Acode_interpreter HTTP/1.1" 200 OK
INFO: ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session HTTP/1.1" 200 OK
15:09:38.802 [END] /v1/models [StatusCode.OK] (5.04ms)
15:09:38.806 [START] /v1/agents
INFO: ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn HTTP/1.1" 200 OK
15:09:38.814 [END] /v1/agents [StatusCode.OK] (8.59ms)
15:09:38.818 [START] /v1/tools
15:09:38.821 [END] /v1/tools [StatusCode.OK] (3.09ms)
15:09:38.825 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session
15:09:38.834 [END] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session [StatusCode.OK] (8.66ms)
15:09:38.836 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn
15:09:38.853 [START] create_and_execute_turn
15:09:38.856 [START] inference
Traceback (most recent call last):
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/server/server.py", line 208, in sse_generator
async for item in event_gen:
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agents.py", line 169, in _create_agent_turn_streaming
async for event in agent.create_and_execute_turn(request):
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 189, in create_and_execute_turn
async for chunk in self.run(
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 258, in run
async for res in self._run(
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 499, in _run
async for chunk in await self.inference_api.chat_completion(
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/routers/routers.py", line 182, in <genexpr>
return (chunk async for chunk in await provider.chat_completion(**params))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 290, in _stream_chat_completion
async for chunk in res:
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 150, in _process_vllm_chat_completion_stream_response
async for chunk in stream:
File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 281, in _to_async_generator
s = client.chat.completions.create(**params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_utils/_utils.py", line 279, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 863, in create
return self._post(
^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 1283, in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 960, in request
return self._request(
^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 986, in _request
request = self._build_request(options, retries_taken=retries_taken)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 506, in _build_request
return self._client.build_request( # pyright: ignore[reportUnknownMemberType]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_client.py", line 358, in build_request
return Request(
^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_models.py", line 342, in __init__
headers, stream = encode_request(
^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 214, in encode_request
return encode_json(json)
^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 177, in encode_json
body = json_dumps(json).encode("utf-8")
^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 200, in encode
chunks = self.iterencode(o, _one_shot=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 258, in iterencode
return _iterencode(o, 0)
^^^^^^^^^^^^^^^^^
File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 180, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type BuiltinTool is not JSON serializable
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment