Skip to content

Instantly share code, notes, and snippets.

@neubig
Created June 29, 2026 23:28
Show Gist options
  • Select an option

  • Save neubig/412ab8df8e6fd0b2bdf10602d77f9d86 to your computer and use it in GitHub Desktop.

Select an option

Save neubig/412ab8df8e6fd0b2bdf10602d77f9d86 to your computer and use it in GitHub Desktop.
Fusion Harness: How to combine a more expensive main model and a sidekick model
"""Fusion-style delegation harness built with the OpenHands SDK.
Install:
uv pip install openhands-sdk openhands-tools
Run:
export LLM_API_KEY="..." # or export OPENHANDS_API_KEY="..."
export MAIN_MODEL="openhands/gpt-5.5"
export SIDEKICK_MODEL="openhands/minimax-m2.7"
uv run python fusion_harness_example.py "Find and fix the failing tests in this repo."
What this demonstrates:
- the main agent keeps one high-capability LLM profile for the whole task
- the cheap sidekick is registered as a sub-agent with its own LLM profile
- the main agent can issue several task-tool calls in one response
- tool_concurrency_limit lets those sidekick calls run concurrently
- sidekick returns ESCALATE_TO_MAIN when work exceeds its budget
"""
from __future__ import annotations
import os
import sys
import tempfile
from pathlib import Path
from pydantic import SecretStr
from openhands.sdk import Agent, AgentContext, Conversation, LLM, LLMProfileStore, Tool
from openhands.sdk.context import Skill
from openhands.sdk.subagent import register_agent
from openhands.sdk.subagent.schema import AgentDefinition
from openhands.tools.delegate import DelegationVisualizer
from openhands.tools.file_editor import FileEditorTool
from openhands.tools.task import TaskToolSet
from openhands.tools.terminal import TerminalTool
DEFAULT_MAIN_MODEL = "openhands/gpt-5.5"
DEFAULT_SIDEKICK_MODEL = "openhands/minimax-m2.7"
SIDEKICK_SKILL = """
You are a fast, low-cost sidekick agent. Your job is to help the main agent,
not to complete the whole user request on your own.
Rules:
1. Prefer read-only investigation: inspect files, locate relevant code, propose
small plans, and draft patch sketches. Do not edit files unless the prompt
explicitly asks you to.
2. Keep output compact and structured. Use at most three tool calls unless the
prompt explicitly allows more. Prefer broad signals over exhaustive reading.
3. If the task requires broad architecture decisions, many-file edits, unknown
product judgement, security-sensitive changes, or you are not confident,
stop and return exactly:
ESCALATE_TO_MAIN: <short reason>
4. Otherwise return:
FINDINGS:
- ...
PROPOSED_NEXT_STEP:
- ...
RISK:
- low|medium|high, with one sentence why
""".strip()
ORCHESTRATOR_SUFFIX = """
You are the main high-capability agent in a fusion-style harness.
Critical parallel-delegation protocol:
- If the user lists multiple independent areas, your initial delegation step MUST
be a single assistant response containing one `task` tool call per area, all
with `subagent_type='sidekick'`.
- Do not delegate only the first area and wait. Do not say you will launch
several sidekicks and then call only one. Actually emit all sidekick task calls
in the same tool-call batch so `tool_concurrency_limit` can run them in
parallel.
- Before the initial delegation batch, avoid direct terminal/file-editor work
unless the user did not provide enough information to form sidekick prompts.
- After all sidekick observations return, review them yourself, deduplicate, and
make final prioritization with the main model.
Good sidekick tasks:
- locate relevant files
- inspect test failures or logs
- summarize a narrow subsystem
- draft a small patch plan
- check docs or dependency files
Do not delegate broad design, final decisions, risky edits, or cross-cutting
implementation. If any sidekick returns `ESCALATE_TO_MAIN`, stop delegating that
thread and handle it yourself with the main model.
Always review sidekick output before acting. Treat sidekick output as advisory,
not authoritative. The final answer and any code changes are your responsibility.
""".strip()
def require_api_key() -> str:
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENHANDS_API_KEY")
if not api_key:
raise RuntimeError(
"Set LLM_API_KEY, or OPENHANDS_API_KEY when using OpenHands-hosted models."
)
return api_key
def save_profile(
store: LLMProfileStore,
name: str,
usage_id: str,
model: str,
base_url: str | None,
) -> None:
store.save(
name,
LLM(
usage_id=usage_id,
model=model,
base_url=base_url,
),
include_secrets=False,
)
def load_profile(store: LLMProfileStore, name: str, api_key: str) -> LLM:
return store.load(name).model_copy(update={"api_key": SecretStr(api_key)})
def build_profiles(api_key: str) -> tuple[LLMProfileStore, LLM]:
base_url = os.getenv("LLM_BASE_URL")
main_model = os.getenv("MAIN_MODEL", DEFAULT_MAIN_MODEL)
sidekick_model = os.getenv("SIDEKICK_MODEL", DEFAULT_SIDEKICK_MODEL)
profile_dir = Path(tempfile.mkdtemp(prefix="openhands-fusion-profiles-"))
store = LLMProfileStore(base_dir=str(profile_dir))
save_profile(
store,
name="fusion-main",
usage_id="main",
model=main_model,
base_url=base_url,
)
save_profile(
store,
name="fusion-sidekick",
usage_id="sidekick",
model=sidekick_model,
base_url=base_url,
)
return store, load_profile(store, "fusion-main", api_key)
def register_sidekick(store: LLMProfileStore, api_key: str) -> None:
def create_sidekick(_: LLM) -> Agent:
return Agent(
llm=load_profile(store, "fusion-sidekick", api_key),
tools=[
Tool(name=TerminalTool.name),
Tool(name=FileEditorTool.name),
],
tool_concurrency_limit=3,
agent_context=AgentContext(
skills=[
Skill(
name="fusion_sidekick_protocol",
content=SIDEKICK_SKILL,
trigger=None,
)
],
system_message_suffix="Stay within the sidekick protocol.",
),
)
register_agent(
name="sidekick",
factory_func=create_sidekick,
description=AgentDefinition(
name="sidekick",
description=(
"Fast low-cost sub-agent for bounded investigation, patch "
"sketches, and escalation signals."
),
max_iteration_per_run=int(os.getenv("SIDEKICK_MAX_ITERATIONS", "6")),
max_budget_per_run=float(os.getenv("SIDEKICK_MAX_BUDGET", "0.10")),
),
)
def build_main_agent(main_llm: LLM) -> Agent:
return Agent(
llm=main_llm,
tools=[
Tool(name=TaskToolSet.name),
Tool(name=TerminalTool.name),
Tool(name=FileEditorTool.name),
],
tool_concurrency_limit=int(os.getenv("MAIN_TOOL_CONCURRENCY", "8")),
agent_context=AgentContext(system_message_suffix=ORCHESTRATOR_SUFFIX),
)
def fusion_prompt(user_task: str) -> str:
return f"""
User task:
{user_task}
Run this in a fusion-style workflow.
MANDATORY INITIAL DELEGATION RULE:
If the user task contains multiple independent areas, directories, files,
questions, or investigation threads, your next tool-using assistant response MUST
contain one `task` tool call for every independent item, all in the same response,
all with `subagent_type='sidekick'`. This is the core behavior being tested. Do
not call only one task. Do not perform direct terminal/file_editor investigation
first. Do not wait between sidekick launches.
After that parallel task batch:
1. Wait for all sidekick observations.
2. Review the sidekick reports yourself.
3. If any sidekick escalates or the work becomes complex, continue with the main
model rather than switching models or restarting the conversation.
4. Complete the task and summarize what was done.
""".strip()
def run(user_task: str, workspace: Path) -> None:
api_key = require_api_key()
store, main_llm = build_profiles(api_key)
register_sidekick(store, api_key)
conversation = Conversation(
agent=build_main_agent(main_llm),
workspace=workspace,
visualizer=DelegationVisualizer(name="Fusion main"),
persistence_dir=Path(tempfile.mkdtemp(prefix="openhands-fusion-run-")),
max_iteration_per_run=int(os.getenv("MAIN_MAX_ITERATIONS", "40")),
)
conversation.send_message(fusion_prompt(user_task))
conversation.run()
metrics = conversation.conversation_stats.get_combined_metrics()
print(f"\nTotal estimated cost: ${metrics.accumulated_cost:.6f}")
def main() -> None:
user_task = " ".join(sys.argv[1:]).strip()
if not user_task:
user_task = "Analyze this repository and suggest the smallest useful improvement."
run(user_task=user_task, workspace=Path.cwd())
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment