thoraxe · February 19, 2025 15:11
diff --git a/e2e-modified.py b/e2e-modified.py
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import asyncio
 import os

 import fire

 from ticker_data import get_ticker_data
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.event_logger import EventLogger
 from llama_stack_client.types.agent_create_params import AgentConfig


 async def run_main(host: str, port: int, disable_safety: bool = False):
    client = LlamaStackClient(
        base_url=f"http://{host}:{port}",
    )

    available_shields = [shield.identifier for shield in client.shields.list()]
    if not available_shields:
        print("No available shields. Disable safety.")
    else:
        print(f"Available shields found: {available_shields}")

    available_models = [
        model.identifier for model in client.models.list() if model.model_type == "llm"
    ]
    supported_models = [x for x in available_models if "3.2" in x and "Vision" not in x]
    if not supported_models:
        raise ValueError(
            "No supported models found. Make sure to have a Llama 3.2 model."
        )
    else:
        selected_model = supported_models[0]
        print(f"Using model: {selected_model}")

    client_tools = [
        get_ticker_data
    ]
    agent_config = AgentConfig(
        model=selected_model,
        instructions="""You are a helpful assistant with access to the following
 function calls. Your task is to produce a list of function calls
 necessary to generate response to the user utterance. Use the following
 function calls as required.""",
        sampling_params={
            "strategy": {"type": "top_p", "temperature": 1.0, "top_p": 0.9},
        },
        toolgroups=[
            "builtin::code_interpreter",
        ],
        client_tools=[
            client_tool.get_tool_definition() for client_tool in client_tools
        ],
        tool_choice="auto",
        tool_prompt_format="python_list",
        input_shields=available_shields if available_shields else [],
        output_shields=available_shields if available_shields else [],
        enable_session_persistence=False,
    )

    agent = Agent(client, agent_config, client_tools)
    session_id = agent.create_session("test-session")
    print(f"Created session_id={session_id} for Agent({agent.agent_id})")

    user_prompts = [
        "What was the closing price of Google stock (ticker symbol GOOG) for 2023 ?"
    ]
    for prompt in user_prompts:
        response = agent.create_turn(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            session_id=session_id,
        )

        for log in EventLogger().log(response):
            log.print()


 def main(host: str, port: int):
    asyncio.run(run_main(host, port))


 if __name__ == "__main__":
    fire.Fire(main)
diff --git a/error.txt b/error.txt
 Listening on ['::', '0.0.0.0']:5000
 INFO:     Started server process [3094321]
 INFO:     Waiting for application startup.
 INFO:     ASGI 'lifespan' protocol appears unsupported.
 INFO:     Application startup complete.
 INFO:     Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit)
 INFO:     ::1:43446 - "GET /v1/shields HTTP/1.1" 200 OK
 15:09:38.783 [START] /v1/shields
 INFO:     ::1:43446 - "GET /v1/models HTTP/1.1" 200 OK
 INFO:     ::1:43446 - "POST /v1/agents HTTP/1.1" 200 OK
 15:09:38.795 [END] /v1/shields [StatusCode.OK] (12.01ms)
 15:09:38.797 [START] /v1/models
 INFO:     ::1:43446 - "GET /v1/tools?toolgroup_id=builtin%3A%3Acode_interpreter HTTP/1.1" 200 OK
 INFO:     ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session HTTP/1.1" 200 OK
 15:09:38.802 [END] /v1/models [StatusCode.OK] (5.04ms)
 15:09:38.806 [START] /v1/agents
 INFO:     ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn HTTP/1.1" 200 OK
 15:09:38.814 [END] /v1/agents [StatusCode.OK] (8.59ms)
 15:09:38.818 [START] /v1/tools
 15:09:38.821 [END] /v1/tools [StatusCode.OK] (3.09ms)
 15:09:38.825 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session
 15:09:38.834 [END] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session [StatusCode.OK] (8.66ms)
 15:09:38.836 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn
 15:09:38.853 [START] create_and_execute_turn
 15:09:38.856 [START] inference
 Traceback (most recent call last):
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/server/server.py", line 208, in sse_generator
    async for item in event_gen:
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agents.py", line 169, in _create_agent_turn_streaming
    async for event in agent.create_and_execute_turn(request):
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 189, in create_and_execute_turn
    async for chunk in self.run(
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 258, in run
    async for res in self._run(
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 499, in _run
    async for chunk in await self.inference_api.chat_completion(
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/routers/routers.py", line 182, in <genexpr>
    return (chunk async for chunk in await provider.chat_completion(**params))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 290, in _stream_chat_completion
    async for chunk in res:
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 150, in _process_vllm_chat_completion_stream_response
    async for chunk in stream:
  File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 281, in _to_async_generator
    s = client.chat.completions.create(**params)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_utils/_utils.py", line 279, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 863, in create
    return self._post(
           ^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 1283, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 960, in request
    return self._request(
           ^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 986, in _request
    request = self._build_request(options, retries_taken=retries_taken)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 506, in _build_request
    return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_client.py", line 358, in build_request
    return Request(
           ^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_models.py", line 342, in __init__
    headers, stream = encode_request(
                      ^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 214, in encode_request
    return encode_json(json)
           ^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 177, in encode_json
    body = json_dumps(json).encode("utf-8")
           ^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/__init__.py", line 231, in dumps
    return _default_encoder.encode(obj)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 200, in encode
    chunks = self.iterencode(o, _one_shot=True)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 258, in iterencode
    return _iterencode(o, 0)
           ^^^^^^^^^^^^^^^^^
  File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 180, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '
 TypeError: Object of type BuiltinTool is not JSON serializable
	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the terms described in the LICENSE file in
	# the root directory of this source tree.
	import asyncio
	import os

	import fire

	from ticker_data import get_ticker_data
	from llama_stack_client import LlamaStackClient
	from llama_stack_client.lib.agents.agent import Agent
	from llama_stack_client.lib.agents.event_logger import EventLogger
	from llama_stack_client.types.agent_create_params import AgentConfig


	async def run_main(host: str, port: int, disable_safety: bool = False):
	client = LlamaStackClient(
	base_url=f"http://{host}:{port}",
	)

	available_shields = [shield.identifier for shield in client.shields.list()]
	if not available_shields:
	print("No available shields. Disable safety.")
	else:
	print(f"Available shields found: {available_shields}")

	available_models = [
	model.identifier for model in client.models.list() if model.model_type == "llm"
	]
	supported_models = [x for x in available_models if "3.2" in x and "Vision" not in x]
	if not supported_models:
	raise ValueError(
	"No supported models found. Make sure to have a Llama 3.2 model."
	)
	else:
	selected_model = supported_models[0]
	print(f"Using model: {selected_model}")

	client_tools = [
	get_ticker_data
	]
	agent_config = AgentConfig(
	model=selected_model,
	instructions="""You are a helpful assistant with access to the following
	function calls. Your task is to produce a list of function calls
	necessary to generate response to the user utterance. Use the following
	function calls as required.""",
	sampling_params={
	"strategy": {"type": "top_p", "temperature": 1.0, "top_p": 0.9},
	},
	toolgroups=[
	"builtin::code_interpreter",
	],
	client_tools=[
	client_tool.get_tool_definition() for client_tool in client_tools
	],
	tool_choice="auto",
	tool_prompt_format="python_list",
	input_shields=available_shields if available_shields else [],
	output_shields=available_shields if available_shields else [],
	enable_session_persistence=False,
	)

	agent = Agent(client, agent_config, client_tools)
	session_id = agent.create_session("test-session")
	print(f"Created session_id={session_id} for Agent({agent.agent_id})")

	user_prompts = [
	"What was the closing price of Google stock (ticker symbol GOOG) for 2023 ?"
	]
	for prompt in user_prompts:
	response = agent.create_turn(
	messages=[
	{
	"role": "user",
	"content": prompt,
	}
	],
	session_id=session_id,
	)

	for log in EventLogger().log(response):
	log.print()


	def main(host: str, port: int):
	asyncio.run(run_main(host, port))


	if __name__ == "__main__":
	fire.Fire(main)
	Listening on ['::', '0.0.0.0']:5000
	INFO: Started server process [3094321]
	INFO: Waiting for application startup.
	INFO: ASGI 'lifespan' protocol appears unsupported.
	INFO: Application startup complete.
	INFO: Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit)
	INFO: ::1:43446 - "GET /v1/shields HTTP/1.1" 200 OK
	15:09:38.783 [START] /v1/shields
	INFO: ::1:43446 - "GET /v1/models HTTP/1.1" 200 OK
	INFO: ::1:43446 - "POST /v1/agents HTTP/1.1" 200 OK
	15:09:38.795 [END] /v1/shields [StatusCode.OK] (12.01ms)
	15:09:38.797 [START] /v1/models
	INFO: ::1:43446 - "GET /v1/tools?toolgroup_id=builtin%3A%3Acode_interpreter HTTP/1.1" 200 OK
	INFO: ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session HTTP/1.1" 200 OK
	15:09:38.802 [END] /v1/models [StatusCode.OK] (5.04ms)
	15:09:38.806 [START] /v1/agents
	INFO: ::1:43446 - "POST /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn HTTP/1.1" 200 OK
	15:09:38.814 [END] /v1/agents [StatusCode.OK] (8.59ms)
	15:09:38.818 [START] /v1/tools
	15:09:38.821 [END] /v1/tools [StatusCode.OK] (3.09ms)
	15:09:38.825 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session
	15:09:38.834 [END] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session [StatusCode.OK] (8.66ms)
	15:09:38.836 [START] /v1/agents/94a0fd3b-b1e7-49b9-86fa-c71fd21dd7cd/session/6ad059a3-4752-4a76-b2d2-fc7c9fee9cd8/turn
	15:09:38.853 [START] create_and_execute_turn
	15:09:38.856 [START] inference
	Traceback (most recent call last):
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/server/server.py", line 208, in sse_generator
	async for item in event_gen:
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agents.py", line 169, in _create_agent_turn_streaming
	async for event in agent.create_and_execute_turn(request):
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 189, in create_and_execute_turn
	async for chunk in self.run(
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 258, in run
	async for res in self._run(
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/inline/agents/meta_reference/agent_instance.py", line 499, in _run
	async for chunk in await self.inference_api.chat_completion(
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/distribution/routers/routers.py", line 182, in <genexpr>
	return (chunk async for chunk in await provider.chat_completion(**params))
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 290, in _stream_chat_completion
	async for chunk in res:
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 150, in _process_vllm_chat_completion_stream_response
	async for chunk in stream:
	File "/home/thoraxe/Red_Hat/openshift/llamaindex-experiments/llama-stack/llama_stack/providers/remote/inference/vllm/vllm.py", line 281, in _to_async_generator
	s = client.chat.completions.create(**params)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_utils/_utils.py", line 279, in wrapper
	return func(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 863, in create
	return self._post(
	^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 1283, in post
	return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 960, in request
	return self._request(
	^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 986, in _request
	request = self._build_request(options, retries_taken=retries_taken)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/openai/_base_client.py", line 506, in _build_request
	return self._client.build_request( # pyright: ignore[reportUnknownMemberType]
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_client.py", line 358, in build_request
	return Request(
	^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_models.py", line 342, in __init__
	headers, stream = encode_request(
	^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 214, in encode_request
	return encode_json(json)
	^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/envs/ols-llamastack/lib/python3.11/site-packages/httpx/_content.py", line 177, in encode_json
	body = json_dumps(json).encode("utf-8")
	^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/__init__.py", line 231, in dumps
	return _default_encoder.encode(obj)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 200, in encode
	chunks = self.iterencode(o, _one_shot=True)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 258, in iterencode
	return _iterencode(o, 0)
	^^^^^^^^^^^^^^^^^
	File "/home/thoraxe/.pyenv/versions/3.11.5/lib/python3.11/json/encoder.py", line 180, in default
	raise TypeError(f'Object of type {o.__class__.__name__} '
	TypeError: Object of type BuiltinTool is not JSON serializable