neubig · June 29, 2026 23:28
diff --git a/fusion_harness_example.py b/fusion_harness_example.py
 """Fusion-style delegation harness built with the OpenHands SDK.

 Install:
    uv pip install openhands-sdk openhands-tools

 Run:
    export LLM_API_KEY="..."  # or export OPENHANDS_API_KEY="..."
    export MAIN_MODEL="openhands/gpt-5.5"
    export SIDEKICK_MODEL="openhands/minimax-m2.7"
    uv run python fusion_harness_example.py "Find and fix the failing tests in this repo."

 What this demonstrates:
 - the main agent keeps one high-capability LLM profile for the whole task
 - the cheap sidekick is registered as a sub-agent with its own LLM profile
 - the main agent can issue several task-tool calls in one response
 - tool_concurrency_limit lets those sidekick calls run concurrently
 - sidekick returns ESCALATE_TO_MAIN when work exceeds its budget
 """

 from __future__ import annotations

 import os
 import sys
 import tempfile
 from pathlib import Path

 from pydantic import SecretStr

 from openhands.sdk import Agent, AgentContext, Conversation, LLM, LLMProfileStore, Tool
 from openhands.sdk.context import Skill
 from openhands.sdk.subagent import register_agent
 from openhands.sdk.subagent.schema import AgentDefinition
 from openhands.tools.delegate import DelegationVisualizer
 from openhands.tools.file_editor import FileEditorTool
 from openhands.tools.task import TaskToolSet
 from openhands.tools.terminal import TerminalTool


 DEFAULT_MAIN_MODEL = "openhands/gpt-5.5"
 DEFAULT_SIDEKICK_MODEL = "openhands/minimax-m2.7"


 SIDEKICK_SKILL = """
 You are a fast, low-cost sidekick agent. Your job is to help the main agent,
 not to complete the whole user request on your own.

 Rules:
 1. Prefer read-only investigation: inspect files, locate relevant code, propose
   small plans, and draft patch sketches. Do not edit files unless the prompt
   explicitly asks you to.
 2. Keep output compact and structured. Use at most three tool calls unless the
   prompt explicitly allows more. Prefer broad signals over exhaustive reading.
 3. If the task requires broad architecture decisions, many-file edits, unknown
   product judgement, security-sensitive changes, or you are not confident,
   stop and return exactly:

   ESCALATE_TO_MAIN: <short reason>

 4. Otherwise return:

   FINDINGS:
   - ...

   PROPOSED_NEXT_STEP:
   - ...

   RISK:
   - low|medium|high, with one sentence why
 """.strip()


 ORCHESTRATOR_SUFFIX = """
 You are the main high-capability agent in a fusion-style harness.

 Critical parallel-delegation protocol:
 - If the user lists multiple independent areas, your initial delegation step MUST
  be a single assistant response containing one `task` tool call per area, all
  with `subagent_type='sidekick'`.
 - Do not delegate only the first area and wait. Do not say you will launch
  several sidekicks and then call only one. Actually emit all sidekick task calls
  in the same tool-call batch so `tool_concurrency_limit` can run them in
  parallel.
 - Before the initial delegation batch, avoid direct terminal/file-editor work
  unless the user did not provide enough information to form sidekick prompts.
 - After all sidekick observations return, review them yourself, deduplicate, and
  make final prioritization with the main model.

 Good sidekick tasks:
 - locate relevant files
 - inspect test failures or logs
 - summarize a narrow subsystem
 - draft a small patch plan
 - check docs or dependency files

 Do not delegate broad design, final decisions, risky edits, or cross-cutting
 implementation. If any sidekick returns `ESCALATE_TO_MAIN`, stop delegating that
 thread and handle it yourself with the main model.

 Always review sidekick output before acting. Treat sidekick output as advisory,
 not authoritative. The final answer and any code changes are your responsibility.
 """.strip()


 def require_api_key() -> str:
    api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENHANDS_API_KEY")
    if not api_key:
        raise RuntimeError(
            "Set LLM_API_KEY, or OPENHANDS_API_KEY when using OpenHands-hosted models."
        )
    return api_key


 def save_profile(
    store: LLMProfileStore,
    name: str,
    usage_id: str,
    model: str,
    base_url: str | None,
 ) -> None:
    store.save(
        name,
        LLM(
            usage_id=usage_id,
            model=model,
            base_url=base_url,
        ),
        include_secrets=False,
    )


 def load_profile(store: LLMProfileStore, name: str, api_key: str) -> LLM:
    return store.load(name).model_copy(update={"api_key": SecretStr(api_key)})


 def build_profiles(api_key: str) -> tuple[LLMProfileStore, LLM]:
    base_url = os.getenv("LLM_BASE_URL")
    main_model = os.getenv("MAIN_MODEL", DEFAULT_MAIN_MODEL)
    sidekick_model = os.getenv("SIDEKICK_MODEL", DEFAULT_SIDEKICK_MODEL)
    profile_dir = Path(tempfile.mkdtemp(prefix="openhands-fusion-profiles-"))
    store = LLMProfileStore(base_dir=str(profile_dir))

    save_profile(
        store,
        name="fusion-main",
        usage_id="main",
        model=main_model,
        base_url=base_url,
    )
    save_profile(
        store,
        name="fusion-sidekick",
        usage_id="sidekick",
        model=sidekick_model,
        base_url=base_url,
    )

    return store, load_profile(store, "fusion-main", api_key)


 def register_sidekick(store: LLMProfileStore, api_key: str) -> None:
    def create_sidekick(_: LLM) -> Agent:
        return Agent(
            llm=load_profile(store, "fusion-sidekick", api_key),
            tools=[
                Tool(name=TerminalTool.name),
                Tool(name=FileEditorTool.name),
            ],
            tool_concurrency_limit=3,
            agent_context=AgentContext(
                skills=[
                    Skill(
                        name="fusion_sidekick_protocol",
                        content=SIDEKICK_SKILL,
                        trigger=None,
                    )
                ],
                system_message_suffix="Stay within the sidekick protocol.",
            ),
        )

    register_agent(
        name="sidekick",
        factory_func=create_sidekick,
        description=AgentDefinition(
            name="sidekick",
            description=(
                "Fast low-cost sub-agent for bounded investigation, patch "
                "sketches, and escalation signals."
            ),
            max_iteration_per_run=int(os.getenv("SIDEKICK_MAX_ITERATIONS", "6")),
            max_budget_per_run=float(os.getenv("SIDEKICK_MAX_BUDGET", "0.10")),
        ),
    )


 def build_main_agent(main_llm: LLM) -> Agent:
    return Agent(
        llm=main_llm,
        tools=[
            Tool(name=TaskToolSet.name),
            Tool(name=TerminalTool.name),
            Tool(name=FileEditorTool.name),
        ],
        tool_concurrency_limit=int(os.getenv("MAIN_TOOL_CONCURRENCY", "8")),
        agent_context=AgentContext(system_message_suffix=ORCHESTRATOR_SUFFIX),
    )


 def fusion_prompt(user_task: str) -> str:
    return f"""
 User task:
 {user_task}

 Run this in a fusion-style workflow.

 MANDATORY INITIAL DELEGATION RULE:
 If the user task contains multiple independent areas, directories, files,
 questions, or investigation threads, your next tool-using assistant response MUST
 contain one `task` tool call for every independent item, all in the same response,
 all with `subagent_type='sidekick'`. This is the core behavior being tested. Do
 not call only one task. Do not perform direct terminal/file_editor investigation
 first. Do not wait between sidekick launches.

 After that parallel task batch:
 1. Wait for all sidekick observations.
 2. Review the sidekick reports yourself.
 3. If any sidekick escalates or the work becomes complex, continue with the main
   model rather than switching models or restarting the conversation.
 4. Complete the task and summarize what was done.
 """.strip()


 def run(user_task: str, workspace: Path) -> None:
    api_key = require_api_key()
    store, main_llm = build_profiles(api_key)
    register_sidekick(store, api_key)

    conversation = Conversation(
        agent=build_main_agent(main_llm),
        workspace=workspace,
        visualizer=DelegationVisualizer(name="Fusion main"),
        persistence_dir=Path(tempfile.mkdtemp(prefix="openhands-fusion-run-")),
        max_iteration_per_run=int(os.getenv("MAIN_MAX_ITERATIONS", "40")),
    )
    conversation.send_message(fusion_prompt(user_task))
    conversation.run()

    metrics = conversation.conversation_stats.get_combined_metrics()
    print(f"\nTotal estimated cost: ${metrics.accumulated_cost:.6f}")


 def main() -> None:
    user_task = " ".join(sys.argv[1:]).strip()
    if not user_task:
        user_task = "Analyze this repository and suggest the smallest useful improvement."

    run(user_task=user_task, workspace=Path.cwd())


 if __name__ == "__main__":
    main()
	"""Fusion-style delegation harness built with the OpenHands SDK.

	Install:
	uv pip install openhands-sdk openhands-tools

	Run:
	export LLM_API_KEY="..." # or export OPENHANDS_API_KEY="..."
	export MAIN_MODEL="openhands/gpt-5.5"
	export SIDEKICK_MODEL="openhands/minimax-m2.7"
	uv run python fusion_harness_example.py "Find and fix the failing tests in this repo."

	What this demonstrates:
	- the main agent keeps one high-capability LLM profile for the whole task
	- the cheap sidekick is registered as a sub-agent with its own LLM profile
	- the main agent can issue several task-tool calls in one response
	- tool_concurrency_limit lets those sidekick calls run concurrently
	- sidekick returns ESCALATE_TO_MAIN when work exceeds its budget
	"""

	from __future__ import annotations

	import os
	import sys
	import tempfile
	from pathlib import Path

	from pydantic import SecretStr

	from openhands.sdk import Agent, AgentContext, Conversation, LLM, LLMProfileStore, Tool
	from openhands.sdk.context import Skill
	from openhands.sdk.subagent import register_agent
	from openhands.sdk.subagent.schema import AgentDefinition
	from openhands.tools.delegate import DelegationVisualizer
	from openhands.tools.file_editor import FileEditorTool
	from openhands.tools.task import TaskToolSet
	from openhands.tools.terminal import TerminalTool


	DEFAULT_MAIN_MODEL = "openhands/gpt-5.5"
	DEFAULT_SIDEKICK_MODEL = "openhands/minimax-m2.7"


	SIDEKICK_SKILL = """
	You are a fast, low-cost sidekick agent. Your job is to help the main agent,
	not to complete the whole user request on your own.

	Rules:
	1. Prefer read-only investigation: inspect files, locate relevant code, propose
	small plans, and draft patch sketches. Do not edit files unless the prompt
	explicitly asks you to.
	2. Keep output compact and structured. Use at most three tool calls unless the
	prompt explicitly allows more. Prefer broad signals over exhaustive reading.
	3. If the task requires broad architecture decisions, many-file edits, unknown
	product judgement, security-sensitive changes, or you are not confident,
	stop and return exactly:

	ESCALATE_TO_MAIN: <short reason>

	4. Otherwise return:

	FINDINGS:
	- ...

	PROPOSED_NEXT_STEP:
	- ...

	RISK:
	- low\|medium\|high, with one sentence why
	""".strip()


	ORCHESTRATOR_SUFFIX = """
	You are the main high-capability agent in a fusion-style harness.

	Critical parallel-delegation protocol:
	- If the user lists multiple independent areas, your initial delegation step MUST
	be a single assistant response containing one `task` tool call per area, all
	with `subagent_type='sidekick'`.
	- Do not delegate only the first area and wait. Do not say you will launch
	several sidekicks and then call only one. Actually emit all sidekick task calls
	in the same tool-call batch so `tool_concurrency_limit` can run them in
	parallel.
	- Before the initial delegation batch, avoid direct terminal/file-editor work
	unless the user did not provide enough information to form sidekick prompts.
	- After all sidekick observations return, review them yourself, deduplicate, and
	make final prioritization with the main model.

	Good sidekick tasks:
	- locate relevant files
	- inspect test failures or logs
	- summarize a narrow subsystem
	- draft a small patch plan
	- check docs or dependency files

	Do not delegate broad design, final decisions, risky edits, or cross-cutting
	implementation. If any sidekick returns `ESCALATE_TO_MAIN`, stop delegating that
	thread and handle it yourself with the main model.

	Always review sidekick output before acting. Treat sidekick output as advisory,
	not authoritative. The final answer and any code changes are your responsibility.
	""".strip()


	def require_api_key() -> str:
	api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENHANDS_API_KEY")
	if not api_key:
	raise RuntimeError(
	"Set LLM_API_KEY, or OPENHANDS_API_KEY when using OpenHands-hosted models."
	)
	return api_key


	def save_profile(
	store: LLMProfileStore,
	name: str,
	usage_id: str,
	model: str,
	base_url: str \| None,
	) -> None:
	store.save(
	name,
	LLM(
	usage_id=usage_id,
	model=model,
	base_url=base_url,
	),
	include_secrets=False,
	)


	def load_profile(store: LLMProfileStore, name: str, api_key: str) -> LLM:
	return store.load(name).model_copy(update={"api_key": SecretStr(api_key)})


	def build_profiles(api_key: str) -> tuple[LLMProfileStore, LLM]:
	base_url = os.getenv("LLM_BASE_URL")
	main_model = os.getenv("MAIN_MODEL", DEFAULT_MAIN_MODEL)
	sidekick_model = os.getenv("SIDEKICK_MODEL", DEFAULT_SIDEKICK_MODEL)
	profile_dir = Path(tempfile.mkdtemp(prefix="openhands-fusion-profiles-"))
	store = LLMProfileStore(base_dir=str(profile_dir))

	save_profile(
	store,
	name="fusion-main",
	usage_id="main",
	model=main_model,
	base_url=base_url,
	)
	save_profile(
	store,
	name="fusion-sidekick",
	usage_id="sidekick",
	model=sidekick_model,
	base_url=base_url,
	)

	return store, load_profile(store, "fusion-main", api_key)


	def register_sidekick(store: LLMProfileStore, api_key: str) -> None:
	def create_sidekick(_: LLM) -> Agent:
	return Agent(
	llm=load_profile(store, "fusion-sidekick", api_key),
	tools=[
	Tool(name=TerminalTool.name),
	Tool(name=FileEditorTool.name),
	],
	tool_concurrency_limit=3,
	agent_context=AgentContext(
	skills=[
	Skill(
	name="fusion_sidekick_protocol",
	content=SIDEKICK_SKILL,
	trigger=None,
	)
	],
	system_message_suffix="Stay within the sidekick protocol.",
	),
	)

	register_agent(
	name="sidekick",
	factory_func=create_sidekick,
	description=AgentDefinition(
	name="sidekick",
	description=(
	"Fast low-cost sub-agent for bounded investigation, patch "
	"sketches, and escalation signals."
	),
	max_iteration_per_run=int(os.getenv("SIDEKICK_MAX_ITERATIONS", "6")),
	max_budget_per_run=float(os.getenv("SIDEKICK_MAX_BUDGET", "0.10")),
	),
	)


	def build_main_agent(main_llm: LLM) -> Agent:
	return Agent(
	llm=main_llm,
	tools=[
	Tool(name=TaskToolSet.name),
	Tool(name=TerminalTool.name),
	Tool(name=FileEditorTool.name),
	],
	tool_concurrency_limit=int(os.getenv("MAIN_TOOL_CONCURRENCY", "8")),
	agent_context=AgentContext(system_message_suffix=ORCHESTRATOR_SUFFIX),
	)


	def fusion_prompt(user_task: str) -> str:
	return f"""
	User task:
	{user_task}

	Run this in a fusion-style workflow.

	MANDATORY INITIAL DELEGATION RULE:
	If the user task contains multiple independent areas, directories, files,
	questions, or investigation threads, your next tool-using assistant response MUST
	contain one `task` tool call for every independent item, all in the same response,
	all with `subagent_type='sidekick'`. This is the core behavior being tested. Do
	not call only one task. Do not perform direct terminal/file_editor investigation
	first. Do not wait between sidekick launches.

	After that parallel task batch:
	1. Wait for all sidekick observations.
	2. Review the sidekick reports yourself.
	3. If any sidekick escalates or the work becomes complex, continue with the main
	model rather than switching models or restarting the conversation.
	4. Complete the task and summarize what was done.
	""".strip()


	def run(user_task: str, workspace: Path) -> None:
	api_key = require_api_key()
	store, main_llm = build_profiles(api_key)
	register_sidekick(store, api_key)

	conversation = Conversation(
	agent=build_main_agent(main_llm),
	workspace=workspace,
	visualizer=DelegationVisualizer(name="Fusion main"),
	persistence_dir=Path(tempfile.mkdtemp(prefix="openhands-fusion-run-")),
	max_iteration_per_run=int(os.getenv("MAIN_MAX_ITERATIONS", "40")),
	)
	conversation.send_message(fusion_prompt(user_task))
	conversation.run()

	metrics = conversation.conversation_stats.get_combined_metrics()
	print(f"\nTotal estimated cost: ${metrics.accumulated_cost:.6f}")


	def main() -> None:
	user_task = " ".join(sys.argv[1:]).strip()
	if not user_task:
	user_task = "Analyze this repository and suggest the smallest useful improvement."

	run(user_task=user_task, workspace=Path.cwd())


	if __name__ == "__main__":
	main()
No results found