smokelore · April 12, 2026 14:30
diff --git a/index.html b/index.html
 <!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>darkfactory — Wiki</title>
 <script src="https://cdn.jsdelivr.net/npm/marked@11.0.0/marked.min.js"></script>
 <script src="https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js"></script>
 <style>

 *{margin:0;padding:0;box-sizing:border-box}
 :root{
  --bg:#ffffff;--sidebar-bg:#f8f9fb;--border:#e5e7eb;
  --text:#1e293b;--text-muted:#64748b;--primary:#2563eb;
  --primary-soft:#eff6ff;--hover:#f1f5f9;--code-bg:#f1f5f9;
  --radius:8px;--shadow:0 1px 3px rgba(0,0,0,.08);
 }
 body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;
  line-height:1.65;color:var(--text);background:var(--bg)}

 .layout{display:flex;min-height:100vh}
 .sidebar{width:280px;background:var(--sidebar-bg);border-right:1px solid var(--border);
  position:fixed;top:0;left:0;bottom:0;overflow-y:auto;padding:24px 16px;
  display:flex;flex-direction:column;z-index:10}
 .content{margin-left:280px;flex:1;padding:48px 64px;max-width:960px}

 .sidebar-header{margin-bottom:20px;padding-bottom:16px;border-bottom:1px solid var(--border)}
 .sidebar-title{font-size:16px;font-weight:700;color:var(--text);display:flex;align-items:center;gap:8px}
 .sidebar-title svg{flex-shrink:0}
 .sidebar-meta{font-size:11px;color:var(--text-muted);margin-top:6px}
 .nav-section{margin-bottom:2px}
 .nav-item{display:block;padding:7px 12px;border-radius:var(--radius);cursor:pointer;
  font-size:13px;color:var(--text);text-decoration:none;transition:all .15s;
  white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
 .nav-item:hover{background:var(--hover)}
 .nav-item.active{background:var(--primary-soft);color:var(--primary);font-weight:600}
 .nav-item.overview{font-weight:600;margin-bottom:4px}
 .nav-children{padding-left:14px;border-left:1px solid var(--border);margin-left:12px}
 .nav-group-label{font-size:11px;font-weight:600;color:var(--text-muted);
  text-transform:uppercase;letter-spacing:.5px;padding:12px 12px 4px;user-select:none}
 .sidebar-footer{margin-top:auto;padding-top:16px;border-top:1px solid var(--border);
  font-size:11px;color:var(--text-muted);text-align:center}

 .content h1{font-size:28px;font-weight:700;margin-bottom:8px;line-height:1.3}
 .content h2{font-size:22px;font-weight:600;margin:32px 0 12px;padding-bottom:6px;border-bottom:1px solid var(--border)}
 .content h3{font-size:17px;font-weight:600;margin:24px 0 8px}
 .content h4{font-size:15px;font-weight:600;margin:20px 0 6px}
 .content p{margin:12px 0}
 .content ul,.content ol{margin:12px 0 12px 24px}
 .content li{margin:4px 0}
 .content a{color:var(--primary);text-decoration:none}
 .content a:hover{text-decoration:underline}
 .content blockquote{border-left:3px solid var(--primary);padding:8px 16px;margin:16px 0;
  background:var(--primary-soft);border-radius:0 var(--radius) var(--radius) 0;
  color:var(--text-muted);font-size:14px}
 .content code{font-family:'SF Mono',Consolas,'Courier New',monospace;font-size:13px;
  background:var(--code-bg);padding:2px 6px;border-radius:4px}
 .content pre{background:#1e293b;color:#e2e8f0;border-radius:var(--radius);padding:16px;
  overflow-x:auto;margin:16px 0}
 .content pre code{background:none;padding:0;font-size:13px;line-height:1.6;color:inherit}
 .content table{border-collapse:collapse;width:100%;margin:16px 0}
 .content th,.content td{border:1px solid var(--border);padding:8px 12px;text-align:left;font-size:14px}
 .content th{background:var(--sidebar-bg);font-weight:600}
 .content img{max-width:100%;border-radius:var(--radius)}
 .content hr{border:none;border-top:1px solid var(--border);margin:32px 0}
 .content .mermaid{margin:20px 0;text-align:center}

 .menu-toggle{display:none;position:fixed;top:12px;left:12px;z-index:20;
  background:var(--bg);border:1px solid var(--border);border-radius:var(--radius);
  padding:8px 12px;cursor:pointer;font-size:18px;box-shadow:var(--shadow)}
 @media(max-width:768px){
  .sidebar{transform:translateX(-100%);transition:transform .2s}
  .sidebar.open{transform:translateX(0);box-shadow:2px 0 12px rgba(0,0,0,.1)}
  .content{margin-left:0;padding:24px 20px;padding-top:56px}
  .menu-toggle{display:block}
 }
 .empty-state{text-align:center;padding:80px 20px;color:var(--text-muted)}
 .empty-state h2{font-size:20px;margin-bottom:8px;border:none}

 </style>
 </head>
 <body>
 <button class="menu-toggle" id="menu-toggle" aria-label="Toggle menu">&#9776;</button>
 <div class="layout">
 <nav class="sidebar" id="sidebar">
 <div class="sidebar-header">
 <div class="sidebar-title">
 <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M2 3h6a4 4 0 014 4v14a3 3 0 00-3-3H2z"/><path d="M22 3h-6a4 4 0 00-4 4v14a3 3 0 013-3h7z"/></svg>
 darkfactory
 </div>
 <div class="sidebar-meta" id="meta-info"></div>
 </div>
 <div id="nav-tree"></div>
 <div class="sidebar-footer">Generated by GitNexus</div>
 </nav>
 <main class="content" id="content">
 <div class="empty-state"><h2>Loading…</h2></div>
 </main>
 </div>
 <script>
 var PAGES = {"agent-runtime-and-tooling-integration":"# Agent Runtime and Tooling Integration\n\n# Agent Runtime and Tooling Integration\n\nThis module is the boundary between DarkFactory’s role-based agent configuration and the underlying DeepAgents/LangChain runtime. It is responsible for:\n\n- defining built-in agent roles and their defaults\n- constructing agent runtimes from `AgentConfig`\n- resolving backends and tools for each role\n- enforcing runtime restrictions for bounded/read-only roles\n- invoking nested agents through a stable message contract\n- adapting DarkFactory-owned tools into LangChain-compatible tools\n\nAt a high level, the module turns a role like `\"planner\"` or `\"coder\"` into a runnable DeepAgent graph with:\n\n- a concrete backend (`filesystem` or `shell`)\n- a system prompt loaded from workspace prompt templates\n- optional repository instruction context appended to the prompt\n- a scoped set of tools with permission ceilings\n- middleware that removes orchestration capabilities for restricted roles\n\n---\n\n## Public Surface\n\n`api/agents/__init__.py` re-exports the main integration points:\n\n- `AgentRegistry`\n- `build_default_agent_registry()`\n- `register_default_agents()`\n- `create_agent()`\n- `create_skill_agent()`\n- `load_prompt()`\n- `resolve_tools()`\n- `invoke_agent()`\n- `extract_text()`\n- `extract_structured()`\n- `ReadOnlyBackend`\n- `resolve_backend()`\n- built-in configs:\n  - `PLANNER_AGENT_CONFIG`\n  - `REVIEWER_AGENT_CONFIG`\n  - `REVISER_AGENT_CONFIG`\n  - `CODER_AGENT_CONFIG`\n  - `CODE_REVIEWER_AGENT_CONFIG`\n\n`api/tools/__init__.py` similarly exposes the tool-side registry and git tool helpers.\n\n---\n\n## Architecture\n\n```mermaid\nflowchart TD\n    A[AgentConfig / role name] --> B[create_agent or create_skill_agent]\n    B --> C[load_prompt + compose_system_prompt]\n    B --> D[resolve_backend]\n    B --> E[resolve_tools]\n    E --> F[ToolRegistry.build_tools]\n    F --> G[ToolAdapter.with_permission]\n    B --> H[build_bounded_agent_middleware]\n    C --> I[create_deep_agent]\n    D --> I\n    E --> I\n    H --> I\n```\n\nThis is the core assembly path. The factory gathers prompt, backend, tools, and middleware, then passes them into `create_deep_agent(...)`.\n\n---\n\n## Agent Role Registry\n\nDefined in `api/agents/registry.py`.\n\n### Built-in role configs\n\nThe module ships with five default `AgentConfig` instances:\n\n- `PLANNER_AGENT_CONFIG`\n- `REVIEWER_AGENT_CONFIG`\n- `REVISER_AGENT_CONFIG`\n- `CODER_AGENT_CONFIG`\n- `CODE_REVIEWER_AGENT_CONFIG`\n\nThese encode the role’s:\n\n- `name`\n- `model`\n- `prompt_template`\n- `tools`\n- `tool_permissions`\n- `backend`\n\nExamples:\n\n- `planner`, `reviewer`, `reviser`, and `code_reviewer` use `backend=\"filesystem\"`\n- `coder` uses `backend=\"shell\"`\n- all built-in roles currently declare the `git` tool with `\"read\"` permission\n\n### `AgentRegistry`\n\n`AgentRegistry` is a simple mutable name-to-config map.\n\nMethods:\n\n- `register(config: AgentConfig) -> None`\n- `get(name: str) -> AgentConfig`\n- `list_agents() -> list[str]`\n- `reload(configs: Mapping[str, AgentConfig]) -> None`\n\nBehavior notes:\n\n- `get()` raises `AgentNotFoundError` if the role is missing\n- `reload()` replaces the entire registry snapshot, rather than merging\n\n### Default registry helpers\n\n- `register_default_agents(registry: AgentRegistry) -> None`\n- `build_default_agent_registry() -> AgentRegistry`\n\nThese are used when no explicit registry is supplied to the factory layer.\n\n### How the rest of the system uses it\n\nThe execution flows show that planning/task graph construction resolves role configs through:\n\n- `resolve_registered_agent_config()`\n- `build_default_agent_registry()`\n- `AgentRegistry.get()`\n\nThat means registry contents directly affect which roles can be materialized during graph execution.\n\n---\n\n## Agent Factory\n\nDefined in `api/agents/factory.py`.\n\nThis file is the main runtime assembly seam.\n\n## Core entry points\n\n### `create_agent(...)`\n\n```python\ncreate_agent(\n    config,\n    clone_path,\n    *,\n    workspace_root=None,\n    repo_context=None,\n    response_format=None,\n    tool_registry=None,\n) -> CompiledStateGraph\n```\n\nCreates a DeepAgent runtime for a normal role.\n\n### `create_skill_agent(...)`\n\n```python\ncreate_skill_agent(\n    config,\n    clone_path,\n    *,\n    workspace_root=None,\n    repo_context=None,\n    response_format=None,\n    tool_registry=None,\n    skills=None,\n    backend_override=None,\n    tool_adapters_override=None,\n) -> CompiledStateGraph\n```\n\nCreates a nested/skill-backed agent through the same shared assembly path, but allows additional overrides:\n\n- `skills`: forwarded to DeepAgents\n- `backend_override`: bypasses normal backend resolution\n- `tool_adapters_override`: bypasses registry-based tool resolution\n\nBoth functions delegate to `_create_agent(...)`.\n\n---\n\n## `_create_agent(...)` execution flow\n\n`_create_agent(...)` performs the full assembly:\n\n1. loads settings with `get_settings()`\n2. publishes model API keys via `publish_model_api_keys(settings)`\n3. resolves `workspace_root`\n4. loads the prompt template with `load_prompt(...)`\n5. composes the final system prompt with `compose_system_prompt(...)`\n6. resolves the backend with `resolve_backend(...)` unless overridden\n7. resolves tools with `resolve_tools(...)` unless overridden\n8. computes middleware with `build_bounded_agent_middleware(...)`\n9. forwards everything into `create_deep_agent(...)`\n\nThe resulting object is a `CompiledStateGraph`.\n\n### Arguments passed to `create_deep_agent(...)`\n\nThe factory builds `agent_kwargs` with:\n\n- `model=config.model`\n- `tools=tools`\n- `backend=backend`\n- `system_prompt=prompt`\n- `name=config.name`\n\nConditionally added:\n\n- `middleware=[middleware]` if middleware exists\n- `skills=list(skills)` if provided\n- `response_format=response_format` if provided\n\nThis is the single place where DarkFactory-specific role configuration becomes a DeepAgents runtime.\n\n---\n\n## Prompt Loading and Repository Context\n\n### `load_prompt(workspace_root: Path, prompt_template: str) -> str`\n\nLoads prompt templates from:\n\n```python\n(workspace_root / \"prompts\")\n```\n\nSecurity behavior:\n\n- resolves both prompt root and target path\n- rejects path traversal by checking `prompt_path.relative_to(prompt_root)`\n- raises `AgentError` if the template escapes the prompt directory\n- raises `AgentError` if the file does not exist\n\nThis prevents prompt template names from reading arbitrary files outside the workspace prompt directory.\n\n### `compose_system_prompt(...)`\n\n```python\ncompose_system_prompt(\n    workspace_root,\n    role_name,\n    base_prompt,\n    repo_context=None,\n) -> str\n```\n\nCurrent behavior:\n\n- trims trailing whitespace from `base_prompt`\n- if `repo_context` is absent or has no `instruction_files`, returns the base prompt unchanged\n- otherwise appends formatted repository context\n\n`workspace_root` and `role_name` are currently unused in the implementation.\n\n### `format_repo_context(repo_context: RepositoryContext) -> str`\n\nRenders repository instruction files into an advisory prompt section.\n\nThe injected section starts with:\n\n- `REPO_CONTEXT_HEADER = \"## Repository Context\"`\n- `REPO_CONTEXT_PREAMBLE`, which explicitly says repository instructions are advisory and must not override role prompts, task constraints, or service-owned policies\n\nEach discovered instruction file is rendered as:\n\n```markdown\n### `relative/path`\n<file content>\n```\n\nThis is important: repository instructions are intentionally framed as contextual guidance, not authoritative policy.\n\n---\n\n## Backend Resolution\n\nDefined in `api/agents/backends.py`.\n\n## `resolve_backend(...)`\n\n```python\nresolve_backend(\n    backend_type: AgentBackendType,\n    clone_path: str | Path,\n) -> BackendProtocol\n```\n\nMaps an `AgentConfig.backend` value to a concrete DeepAgents backend.\n\nSupported values:\n\n- `\"filesystem\"` → `ReadOnlyBackend(FilesystemBackend(...))`\n- `\"shell\"` → `LocalShellBackend(...)`\n\nBoth backends are rooted at the resolved `clone_path` and use `virtual_mode=True`.\n\nFor shell backends:\n\n- `inherit_env=True` is enabled on `LocalShellBackend`\n\nUnknown backend types raise `AgentError`.\n\n### Why filesystem roles are wrapped\n\nFor `\"filesystem\"` roles, the factory does not expose a raw `FilesystemBackend`. It wraps it in `ReadOnlyBackend`, which blocks mutation APIs while preserving read/search/list operations.\n\nThis is the first layer of runtime restriction for bounded roles.\n\n---\n\n## `ReadOnlyBackend`\n\n`ReadOnlyBackend` implements `BackendProtocol` and delegates all read-only operations to an inner backend:\n\n- `ls_info`\n- `read`\n- `grep_raw`\n- `glob_info`\n- `download_files`\n- async equivalents:\n  - `als_info`\n  - `aread`\n  - `agrep_raw`\n  - `aglob_info`\n  - `adownload_files`\n\nMutation operations are blocked:\n\n- `write()` / `awrite()` return `WriteResult(error=READ_ONLY_WRITE_ERROR)`\n- `edit()` / `aedit()` return `EditResult(error=READ_ONLY_EDIT_ERROR)`\n- `upload_files()` returns `FileUploadResponse(..., error=\"permission_denied\")` for each file\n- `aupload_files()` delegates to `upload_files()`\n\nThis design is intentionally non-throwing for blocked writes/edits/uploads: callers receive structured failure results instead of exceptions.\n\n### Error constants\n\n- `READ_ONLY_WRITE_ERROR = \"Write operations are not permitted for this agent role.\"`\n- `READ_ONLY_EDIT_ERROR = \"Edit operations are not permitted for this agent role.\"`\n\n---\n\n## Tool Resolution and Permission Scoping\n\nTool integration spans:\n\n- `api/agents/factory.py`\n- `api/tools/registry.py`\n- `api/tools/base.py`\n- `api/tools/git.py`\n\nThe design separates three concerns:\n\n1. **which tools a role declares** — `AgentConfig.tools`\n2. **how a tool is materialized** — `ToolRegistry` / `ToolRegistration`\n3. **what permission ceiling applies at runtime** — `ToolAdapter.with_permission(...)`\n\n---\n\n## Tool registry\n\nDefined in `api/tools/registry.py`.\n\n### `ToolRegistration`\n\nA `ToolRegistration` binds:\n\n- `config: ToolConfig`\n- `materialize: ToolMaterializer`\n\n`materialize` receives a `ToolBuildContext` and returns a `ToolAdapter`.\n\n### `ToolBuildContext`\n\nContains the agent-scoped inputs needed to build a runtime tool:\n\n- `clone_path: Path`\n- `agent_config: AgentConfig`\n- `registry: ToolRegistry`\n\nThis lets a materializer build tools that depend on the current clone or role.\n\n### `ToolRegistry`\n\nA mutable name-to-registration map with:\n\n- `register(tool: ToolRegistration) -> None`\n- `update(tools: Iterable[ToolRegistration]) -> None`\n- `unregister(name: str) -> None`\n- `get(name: str) -> ToolRegistration`\n- `list_tools() -> list[str]`\n\n`get()` raises `ToolNotFoundError` if the tool is missing.\n\n### Default tool registry\n\n- `register_default_tools(registry: ToolRegistry) -> None`\n- `build_default_tool_registry() -> ToolRegistry`\n\nCurrently, the default registry registers only one tool:\n\n- `git`\n\nThe registration materializes a clone-scoped adapter via:\n\n```python\nlambda build_context: create_git_tool_adapter(build_context.clone_path)\n```\n\n---\n\n## `build_tools(...)`\n\n```python\nbuild_tools(\n    config: AgentConfig,\n    registry: ToolRegistry,\n    clone_path: str | Path,\n) -> list[ToolAdapter]\n```\n\nThis function resolves the tools declared by an agent config and scopes each one to the role’s permission ceiling.\n\nFor each `tool_name` in `config.tools`:\n\n1. fetch registration with `registry.get(tool_name)`\n2. read the role’s max level from `config.tool_permissions[tool_name]`\n3. materialize the adapter with `registration.build(build_context)`\n4. apply `with_permission(max_level)`\n\nThe result is a list of `ToolAdapter` instances already constrained for that role.\n\nThis permission-scoping step is central: even if a tool supports broader permissions, the role only receives the minimum of:\n\n- the adapter’s own maximum declared capability\n- the role’s configured permission ceiling\n\n---\n\n## `resolve_tools(...)`\n\nDefined in `api/agents/factory.py`.\n\n```python\nresolve_tools(\n    config,\n    clone_path,\n    *,\n    tool_registry=None,\n    tool_adapters_override=None,\n) -> Sequence[BaseTool]\n```\n\nBehavior:\n\n- if `tool_adapters_override` is provided, it is used directly and converted with `adapter.to_langchain_tool()`\n- if `config.tools` is empty, returns `[]`\n- otherwise:\n  - uses the provided `tool_registry` or `build_default_tool_registry()`\n  - calls `build_tools(config, registry, clone_path)`\n  - converts each `ToolAdapter` to a LangChain tool\n\nThis is the point where DarkFactory-owned tool adapters become runtime tools consumable by DeepAgents.\n\n---\n\n## ToolAdapter contract\n\nDefined in `api/tools/base.py`.\n\n`ToolAdapter` is the runtime abstraction that DarkFactory owns. It wraps either:\n\n- an existing LangChain `BaseTool` (`inner`)\n- or a custom async executor (`executor`)\n\nAt least one must be provided.\n\n### Constructor\n\n```python\nToolAdapter(\n    config: ToolConfig,\n    inner: BaseTool | None = None,\n    executor: ToolExecutor | None = None,\n    *,\n    max_permission_level: AgentToolPermission | None = None,\n)\n```\n\nIf `max_permission_level` is omitted, the adapter computes it from `config.permissions` using `_highest_declared_permission()`.\n\n### `execute(...)`\n\n```python\nasync def execute(self, **kwargs: object) -> str\n```\n\nThis is the permission-enforcing execution boundary.\n\nSpecial behavior:\n\n- checks for `_df_permission_required` in `kwargs`\n- if present, normalizes it with `normalize_permission_level(...)`\n- compares it against the adapter ceiling with `permission_allows(...)`\n- if denied, returns a serialized JSON payload with:\n  - `status=\"denied\"`\n  - `error=False`\n  - `required`\n  - `allowed`\n  - explanatory `message`\n\nExecution behavior:\n\n- if `_executor` exists, calls it directly\n- otherwise invokes the wrapped LangChain tool via `_invoke_inner_tool(...)`\n- exceptions are caught and returned as serialized JSON with:\n  - `status=\"error\"`\n  - `error=True`\n  - `message=str(exc)`\n\nNotably, adapter execution failures are contained and converted into string payloads rather than propagated as exceptions.\n\n### `with_permission(...)`\n\nReturns a new `ToolAdapter` with a reduced permission ceiling.\n\nIt computes the effective level using `_min_permission_level(...)`, which chooses the more restrictive of:\n\n- the adapter’s current ceiling\n- the requested ceiling\n\nThis prevents permission escalation when tools are reused across roles.\n\n### `to_langchain_tool()`\n\nConverts the adapter into a `StructuredTool` using:\n\n- a sync wrapper that calls `asyncio.run(self.execute(...))`\n- an async wrapper that awaits `self.execute(...)`\n\nThe resulting tool uses:\n\n- `name=self.config.name`\n- `description=self.config.description`\n- `args_schema=self._resolve_args_schema()`\n\n### `from_langchain_tool(...)`\n\nWraps an existing LangChain tool behind the adapter contract.\n\n### Output coercion\n\n`_coerce_output(...)` normalizes tool results to strings:\n\n- `str` → unchanged\n- `None` → `\"\"`\n- `dict` / `list` → JSON string\n- anything else → `str(result)`\n\nThis keeps the runtime contract simple for agent consumption.\n\n---\n\n## Built-in Git Tool\n\nDefined in `api/tools/git.py`.\n\nThis is the only built-in DarkFactory-owned tool currently registered by default.\n\n## Purpose\n\nThe git tool provides read-only repository inspection inside the task clone.\n\nAllowed subcommands:\n\n- `\"log\"`\n- `\"diff\"`\n- `\"show\"`\n\nThese are encoded by:\n\n```python\nGitReadCommand = Literal[\"log\", \"diff\", \"show\"]\n```\n\n### Tool metadata\n\n- `GIT_TOOL_DESCRIPTION`\n- `GIT_TOOL_CONFIG`\n\n`GIT_TOOL_CONFIG` declares one permission:\n\n- `ToolPermission(name=\"git\", level=\"read\")`\n\n### `create_git_tool(repo_path)`\n\nBuilds a `StructuredTool` named `\"git\"` scoped to the resolved repository path.\n\nIt exposes both sync and async execution paths, both backed by `_run_git_read_command(...)`.\n\n### `create_git_tool_adapter(repo_path)`\n\nWraps the structured tool with:\n\n```python\nToolAdapter.from_langchain_tool(create_git_tool(repo_path), GIT_TOOL_CONFIG)\n```\n\nThis is what the default tool registry materializes.\n\n---\n\n## Git command execution details\n\n### `_run_git_read_command(...)`\n\nThis function performs the actual subprocess call.\n\nBehavior:\n\n1. verifies `repo_path.exists()`\n2. parses `arguments` with `shlex.split(...)`\n3. validates repo-relative paths with `_validate_repo_relative_paths(...)`\n4. sets pager-related env vars:\n   - `GIT_PAGER=cat`\n   - `PAGER=cat`\n5. runs:\n\n```bash\ngit --no-pager <command> <arguments...>\n```\n\nwith:\n\n- `cwd=repo_path`\n- `capture_output=True`\n- `text=True`\n- `check=False`\n\n### Expected diff exit handling\n\nGit `diff` commonly exits with status `1` when differences are found. That is not treated as an error here.\n\n`_is_expected_diff_exit(...)` recognizes:\n\n- `command == \"diff\"`\n- `returncode == 1`\n- empty stderr\n\nThen `_format_expected_diff_result(...)` returns either:\n\n- the diff output, or\n- a fallback explanatory message\n\n### Invalid revision guidance\n\nIf stderr matches known invalid revision markers, `_is_invalid_revision_error(...)` returns true and the tool returns a user-guiding message instead of raising.\n\nThis message explicitly advises against guessing refs like `HEAD~N` and suggests inspecting history first.\n\n### Path validation\n\n`_validate_repo_relative_paths(...)` only checks path arguments after `--`.\n\nRejected cases:\n\n- absolute paths like `/api`\n- parent-relative paths containing `..`\n\nBoth raise `AgentError`.\n\nThis keeps git path queries inside the task clone root.\n\n---\n\n## Runtime Middleware for Bounded Roles\n\nDefined in `api/agents/middleware.py`.\n\n`DisableSubagentToolMiddleware` is DarkFactory-owned middleware that modifies model requests before they reach the model.\n\n## Purpose\n\nIt restricts tool access for roles that should not orchestrate subagents or write todos, even if the underlying runtime might otherwise expose those capabilities.\n\nThis is the second layer of restriction after backend selection.\n\n### Constructor\n\n```python\nDisableSubagentToolMiddleware(\n    disabled_tool_names: Sequence[str] = (),\n    *,\n    allowed_tool_names: Sequence[str] | None = None,\n)\n```\n\nModes:\n\n- blacklist mode via `disabled_tool_names`\n- allowlist mode via `allowed_tool_names`\n\n### Request interception\n\nBoth:\n\n- `wrap_model_call(...)`\n- `awrap_model_call(...)`\n\ndelegate to `_override_request(...)`.\n\n### `_override_request(...)`\n\nReturns `request.override(...)` with:\n\n- filtered tools from `_filter_tools(...)`\n- cleaned system prompt from `_strip_built_in_prompts(...)`\n\n### `_filter_tools(...)`\n\nRemoves tools whose names are disabled, then optionally applies an allowlist.\n\nTool names are extracted by `_tool_name(...)`, which supports both:\n\n- `BaseTool`\n- dict-shaped tool descriptors\n\n### `_strip_built_in_prompts(...)`\n\nRemoves built-in orchestration prompt fragments from the system message if present.\n\nIt searches for:\n\n- `WRITE_TODOS_SYSTEM_PROMPT`\n- `TASK_SYSTEM_PROMPT`\n\nIf either appears, everything from the first occurrence onward is stripped.\n\nThis matters because simply removing tools is not enough; the model should also stop receiving built-in instructions that encourage orchestration behavior.\n\n---\n\n## Middleware selection by role\n\nDefined in `build_bounded_agent_middleware(config)` in `api/agents/factory.py`.\n\nBehavior:\n\n- if `config.backend != \"filesystem\"` → no middleware\n- if `config.name` is in `PROMPT_ONLY_AGENT_ROLES` (`reviewer`, `reviser`, `code_reviewer`) → return `DisableSubagentToolMiddleware(allowed_tool_names=[])`\n- if `config.name` is in `DISABLE_ORCHESTRATION_TOOL_ROLES` (`planner`) → return `DisableSubagentToolMiddleware([\"task\", \"write_todos\"])`\n- otherwise → no middleware\n\nInterpretation:\n\n- prompt-only roles get **no runtime tools at all**\n- planner keeps its declared tools (for example `git`) but loses orchestration tools like `task` and `write_todos`\n- shell-backed roles like `coder` are not filtered by this middleware\n\nThis role-specific middleware is attached in `_create_agent(...)` only when needed.\n\n---\n\n## Agent Invocation Helpers\n\nDefined in `api/agents/invoke.py`.\n\nThese helpers standardize how nested agents are called and how their outputs are read.\n\n## `AgentRunnable`\n\nA minimal protocol requiring:\n\n```python\nasync def ainvoke(\n    input: dict[str, object],\n    config: dict[str, object] | None = None,\n) -> dict[str, object]\n```\n\nThis keeps the invocation helper decoupled from a concrete DeepAgents class.\n\n## `invoke_agent(...)`\n\n```python\nasync def invoke_agent(\n    agent: AgentRunnable,\n    prompt: str,\n    thread_id: str,\n    *,\n    agent_name: str | None = None,\n) -> dict[str, object]\n```\n\nBehavior:\n\n- creates a logging context with `thread_id` and optional `agent`\n- logs start and completion timing\n- invokes the agent with a standard message payload:\n\n```python\n{\"messages\": [HumanMessage(content=prompt)]}\n```\n\n- passes thread identity through runtime config:\n\n```python\nconfig={\"configurable\": {\"thread_id\": thread_id}}\n```\n\n- on exception:\n  - logs failure with elapsed time\n  - re-raises the exception\n\nThis helper is the standard nested-agent invocation contract.\n\n### Why `thread_id` is passed twice\n\nIt appears in:\n\n- logging context\n- `configurable.thread_id` for the runtime\n\nThat keeps observability and runtime state aligned.\n\n---\n\n## Response extraction helpers\n\n### `extract_text(result) -> str`\n\nReads the final assistant text from a DeepAgents response payload.\n\nExpected shape:\n\n- `result[\"messages\"]` must be a non-empty list\n- the last message must contain extractable text\n\nSupported content forms:\n\n- plain string content\n- list content containing dict parts with `{\"type\": \"text\", \"text\": ...}`\n\nIf no text can be extracted, raises `AgentError`.\n\n### `extract_structured(result) -> BaseModel`\n\nReads `result[\"structured_response\"]` and requires it to be a `pydantic.BaseModel`.\n\nIf missing or invalid, raises `AgentError`.\n\nUse this when the agent was created with `response_format=...`.\n\n---\n\n## Registered Agent Creation Helpers\n\nAlso in `api/agents/factory.py`.\n\n### `resolve_registered_agent_config(...)`\n\n```python\nresolve_registered_agent_config(\n    role_name: str,\n    *,\n    agent_registry: AgentRegistry | None = None,\n) -> AgentConfig\n```\n\nUses the provided registry or `build_default_agent_registry()` and returns `registry.get(role_name)`.\n\n### `create_registered_agent(...)`\n\n```python\ncreate_registered_agent(\n    role_name: str,\n    clone_path: str | Path,\n    *,\n    workspace_root: Path | None = None,\n    repo_context: RepositoryContext | None = None,\n    response_format: type[ResponseModelT] | None = None,\n    agent_registry: AgentRegistry | None = None,\n    tool_registry: ToolRegistry | None = None,\n) -> CompiledStateGraph\n```\n\nConvenience wrapper that resolves the role config first, then delegates to `create_agent(...)`.\n\nThis is the easiest entry point when callers know the role name but not the full `AgentConfig`.\n\n---\n\n## How restrictions are layered\n\nThe module enforces role boundaries in multiple places, not just one.\n\n## 1. Backend-level restriction\n\n`resolve_backend(\"filesystem\", ...)` returns `ReadOnlyBackend(...)`, which blocks writes, edits, and uploads.\n\n## 2. Tool-level restriction\n\n`build_tools(...)` applies `ToolAdapter.with_permission(...)` using `config.tool_permissions`.\n\n## 3. Middleware-level restriction\n\n`build_bounded_agent_middleware(...)` removes orchestration tools and strips orchestration prompts for bounded roles.\n\n## 4. Prompt-level framing\n\n`format_repo_context(...)` explicitly marks repository instructions as advisory only.\n\nThese layers are complementary:\n\n- backend restriction prevents file mutation\n- tool restriction prevents permission escalation\n- middleware restriction prevents orchestration behavior\n- prompt framing prevents repository instructions from overriding service policy\n\n---\n\n## Extension points\n\n## Adding a new agent role\n\n1. create a new `AgentConfig`\n2. register it in an `AgentRegistry`\n3. optionally include it in `DEFAULT_AGENT_CONFIGS`\n4. add a prompt template under `<workspace_root>/prompts/`\n5. decide whether `build_bounded_agent_middleware(...)` should restrict it\n\nIf the role uses `backend=\"filesystem\"`, it will automatically get a read-only backend.\n\n## Adding a new tool\n\n1. define a `ToolConfig`\n2. implement a materializer returning a `ToolAdapter`\n3. register it as a `ToolRegistration` in `ToolRegistry`\n4. add the tool name to `AgentConfig.tools`\n5. add a permission entry in `AgentConfig.tool_permissions`\n\nIf the tool wraps an existing LangChain tool, `ToolAdapter.from_langchain_tool(...)` is the simplest path.\n\nIf the tool is custom, provide an async `executor` to `ToolAdapter(...)`.\n\n## Overriding tools or backend for nested agents\n\n`create_skill_agent(...)` supports:\n\n- `backend_override`\n- `tool_adapters_override`\n\nThese are useful when a nested agent should reuse the shared factory logic but run with a custom runtime surface.\n\n---\n\n## Error behavior\n\nTyped errors raised by this module include:\n\n- `AgentError`\n  - unknown backend type\n  - prompt path escape\n  - missing prompt template\n  - malformed agent response\n  - git path/repo failures\n- `AgentNotFoundError`\n  - missing agent role in `AgentRegistry`\n- `ToolNotFoundError`\n  - missing tool registration in `ToolRegistry`\n\nSome failures are intentionally returned as structured strings instead of exceptions:\n\n- `ToolAdapter.execute(...)` permission denials and execution exceptions\n- `ReadOnlyBackend.write/edit/upload` mutation attempts\n\nThis split is deliberate:\n\n- configuration/runtime assembly errors raise\n- bounded runtime operations often return structured failure payloads\n\n---\n\n## Typical lifecycle\n\n## Creating a built-in role agent\n\nA caller typically:\n\n1. resolves a role config from `AgentRegistry`\n2. calls `create_agent(...)` or `create_registered_agent(...)`\n3. receives a `CompiledStateGraph`\n4. invokes it through `invoke_agent(...)`\n5. reads output with `extract_text(...)` or `extract_structured(...)`\n\n## What happens for a planner\n\nFor `PLANNER_AGENT_CONFIG`:\n\n- prompt template: `planner.md`\n- backend: read-only filesystem\n- tool: `git` with read permission\n- middleware: removes orchestration tools (`task`, `write_todos`) and strips their built-in prompts\n\n## What happens for a reviewer\n\nFor `REVIEWER_AGENT_CONFIG`:\n\n- backend: read-only filesystem\n- declared tool: `git`\n- middleware: `allowed_tool_names=[]`\n\nThat means the runtime tool surface is effectively empty, even though the config declares `git`. This is intentional for prompt-only bounded roles.\n\n## What happens for a coder\n\nFor `CODER_AGENT_CONFIG`:\n\n- backend: shell\n- tool: `git`\n- no bounded-role middleware is applied because the backend is not `filesystem`\n\n---\n\n## Contribution notes\n\n## Prefer the factory seam\n\nIf you need to change how agents are assembled, `api/agents/factory.py` is the intended integration point. Avoid scattering backend/tool/prompt logic elsewhere.\n\n## Keep tool permissions non-escalating\n\nAny new tool integration should preserve the `ToolAdapter.with_permission(...)` pattern. Role configs should only narrow permissions, never widen them.\n\n## Be careful with prompt injection sources\n\n`compose_system_prompt(...)` and `format_repo_context(...)` are where repository-derived instructions enter the model prompt. Changes here affect policy boundaries.\n\n## Middleware changes affect model behavior, not just tool availability\n\n`DisableSubagentToolMiddleware` strips both tools and built-in orchestration prompt fragments. If you add new orchestration-related tools or prompt fragments upstream, this middleware may need to be updated to keep bounded roles truly bounded.\n\n## Preserve clone scoping\n\nBoth backends and tools are rooted in `clone_path`. New integrations should maintain that same task-clone scoping model.\n\n---\n\n## Module map\n\n- `api/agents/__init__.py`  \n  Re-export surface for agent runtime integration.\n\n- `api/agents/registry.py`  \n  Built-in role configs and `AgentRegistry`.\n\n- `api/agents/factory.py`  \n  Main assembly seam for prompts, backends, tools, middleware, and DeepAgent creation.\n\n- `api/agents/backends.py`  \n  Backend resolution and `ReadOnlyBackend`.\n\n- `api/agents/middleware.py`  \n  Runtime request filtering for bounded roles.\n\n- `api/agents/invoke.py`  \n  Standard nested-agent invocation and response extraction helpers.\n\n- `api/tools/__init__.py`  \n  Re-export surface for tool integration.\n\n- `api/tools/registry.py`  \n  Tool registry, registrations, and materialization helpers.\n\n- `api/tools/base.py`  \n  `ToolAdapter` permission and execution boundary.\n\n- `api/tools/git.py`  \n  Built-in read-only git inspection tool.\n\n---\n\n## Summary\n\nThis module is the runtime composition layer for DarkFactory agents. Its main responsibilities are:\n\n- map role configs to concrete DeepAgent runtimes\n- keep agent execution scoped to a task clone\n- enforce read-only and bounded-role restrictions\n- expose DarkFactory-owned tools through a stable adapter contract\n- provide a standard invocation and output extraction path\n\nIf you are changing agent capabilities, role defaults, tool availability, or nested-agent behavior, this is the module you will be working in.","application-bootstrap-and-api-surface":"# Application Bootstrap and API Surface\n\n# Application Bootstrap and API Surface\n\nThis module defines how the DarkFactory API process starts, how the FastAPI application is assembled, what happens during startup and shutdown, and the public HTTP endpoints exposed by the service.\n\nIt spans four files:\n\n- `api/__main__.py` — CLI entrypoint\n- `api/app.py` — FastAPI application factory and lifecycle management\n- `api/routes/__init__.py` — route exports\n- `api/routes/tasks.py` — task and artifact HTTP endpoints\n\n## Responsibilities\n\nAt a high level, this module is responsible for:\n\n- starting the API server from the command line\n- constructing the `FastAPI` app with all shared runtime state\n- validating environment and infrastructure before serving traffic\n- initializing registries and services used by request handlers\n- recovering in-flight work on startup\n- cleaning up stale clone directories\n- exposing the task-oriented HTTP API\n- translating domain exceptions into HTTP responses\n\n## Entry Point: `python -m api`\n\nThe CLI entrypoint lives in `api/__main__.py`.\n\n## `main()`\n\n```python\ndef main() -> None:\n```\n\n`main()` creates an `argparse` CLI with a single subcommand:\n\n- `serve` — runs the API server\n\nSupported options for `serve`:\n\n- `--host` (default: `127.0.0.1`)\n- `--port` (default: `8000`)\n\nWhen invoked, it starts Uvicorn with:\n\n```python\nuvicorn.run(\"api.app:create_app\", factory=True, host=args.host, port=args.port)\n```\n\nImportant details:\n\n- `factory=True` means Uvicorn calls `api.app:create_app` to obtain the ASGI app.\n- The app is not created at import time; it is built on demand.\n- This pattern is useful for tests and for startup validation that should happen during app creation/lifespan rather than module import.\n\nExample:\n\n```bash\npython -m api serve --host 0.0.0.0 --port 8000\n```\n\n## Application Factory\n\nThe core bootstrap logic is in `api/app.py`.\n\n## `create_app()`\n\n```python\ndef create_app(\n    settings: Settings | None = None,\n    *,\n    task_service_factory: TaskServiceFactory | None = None,\n) -> FastAPI:\n```\n\n`create_app()` builds and returns the `FastAPI` application.\n\n### Inputs\n\n- `settings`: optional `Settings`\n  - If omitted, `get_settings()` is used.\n- `task_service_factory`: optional override for constructing `TaskService`\n  - Signature: `Callable[[asyncpg.Pool, Settings], TaskService]`\n  - Primarily useful for tests or custom wiring\n\n### What `create_app()` does\n\n1. Resolves settings\n2. Publishes model API keys via `publish_model_api_keys(app_settings)`\n3. Configures logging with `_configure_logging(app_settings)`\n4. Defines the application lifespan handler\n5. Creates the `FastAPI` instance\n6. Builds and stores shared registries on `app.state`\n7. Registers routers\n8. Registers `/healthz`\n9. Registers exception handlers\n\n### Shared application state\n\n`create_app()` stores several runtime objects on `app.state`:\n\n- `app.state.settings`\n- `app.state.agent_registry`\n- `app.state.tool_registry`\n- `app.state.skill_catalog`\n- `app.state.db_pool` (during lifespan startup)\n- `app.state.task_service` (during lifespan startup)\n\nThese objects are the bridge between bootstrap and request handling. Route handlers do not construct services themselves; they retrieve `request.app.state.task_service`.\n\n### Registry and catalog initialization\n\nBefore serving requests, `create_app()` initializes:\n\n- `build_default_agent_registry()`\n- `build_default_tool_registry()`\n- `build_default_skill_catalog(app_settings.workspace_root)`\n\nThen it synchronizes skill/tool registrations:\n\n```python\nsync_skill_tool_registrations(tool_registry, skill_catalog)\n```\n\nThis means the app’s runtime capabilities are assembled centrally during bootstrap rather than lazily inside request handlers.\n\n## Lifecycle Management\n\nThe app uses a FastAPI lifespan context manager defined inside `create_app()`.\n\n### Startup sequence\n\nOn startup, the lifespan handler performs the following steps in order:\n\n1. `await _validate_startup(app_settings)`\n2. Ensure `clone_root` exists\n3. Create the database pool with `create_pool(app_settings.database_url)`\n4. Remove orphan clone directories with `_cleanup_orphan_clones(...)`\n5. Construct `TaskService`\n6. Recover running tasks with `recover_running_tasks()`\n7. Recover publication tasks with `recover_publication_tasks()`\n8. Log startup completion\n\n### Shutdown sequence\n\nOn shutdown, the lifespan handler:\n\n1. calls `await app.state.task_service.shutdown()`\n2. closes the database pool with `await close_pool(app.state.db_pool)`\n3. logs shutdown completion\n\nThis ordering matters: the service is shut down before the pool is closed, so any in-flight service cleanup can still use the database if needed.\n\n## Bootstrap Flow\n\n```mermaid\nflowchart TD\n    A[python -m api serve] --> B[uvicorn.run]\n    B --> C[create_app]\n    C --> D[configure settings/logging]\n    C --> E[build registries and skill catalog]\n    C --> F[FastAPI lifespan startup]\n    F --> G[validate startup]\n    F --> H[create db pool]\n    F --> I[cleanup orphan clones]\n    F --> J[create TaskService]\n    F --> K[recover tasks/publications]\n    C --> L[register routes and handlers]\n```\n\n## Startup Validation\n\nStartup validation is implemented by `_validate_startup()` and related helpers.\n\n## `_validate_startup()`\n\n```python\nasync def _validate_startup(settings: Settings) -> None:\n```\n\nThis function collects configuration and environment issues and raises a single `ConfigurationError` if anything is wrong.\n\n### Validation checks\n\n#### 1. Settings-level issues\n\n```python\nissues = settings.get_startup_issues()\n```\n\nThis delegates baseline validation to the `Settings` object.\n\n#### 2. Git CLI availability\n\n```python\ngit_executable = shutil.which(\"git\")\n```\n\nIf `git` is not installed or not on `PATH`, startup fails.\n\n#### 3. Target repository validity\n\nIf `git` is available, `_validate_startup()` calls:\n\n```python\nawait _get_target_repo_issues(settings.target_repo, git_executable)\n```\n\nThis validates that `TARGET_REPO` is usable by the system.\n\n#### 4. Database connectivity\n\nIt attempts a direct `asyncpg.connect(settings.database_url)`.\n\n- On failure, startup records a connectivity issue.\n- On success, the temporary connection is closed.\n\n#### 5. Database schema validation\n\nIf the database is connectable, it calls:\n\n```python\nawait validate_database_schema(settings.database_url)\n```\n\nAny `ConfigurationError` raised here is converted into a startup issue.\n\n### Failure behavior\n\nIf any issues were collected:\n\n```python\nraise ConfigurationError(\"Startup validation failed: \" + \"; \".join(issues))\n```\n\nThis causes application startup to fail before the server begins handling requests.\n\n### Why this matters\n\nThis design pushes operational failures to startup time instead of surfacing them later as partial runtime failures. Contributors should preserve this property when adding new infrastructure dependencies.\n\n## Target Repository Validation\n\n## `_get_target_repo_issues()`\n\n```python\nasync def _get_target_repo_issues(target_repo: Path, git_executable: str) -> list[str]:\n```\n\nThis helper validates `settings.target_repo`.\n\n### Behavior\n\n- If `target_repo` does not exist, it returns `[]`\n- If it exists, it must be a valid Git repository\n- Accepted repository forms:\n  - a bare repository\n  - a checked-out repository whose `receive.denyCurrentBranch` is set to `updateInstead`\n\n### Validation logic\n\nThe function uses `_get_git_output()` to run Git commands against `target_repo`.\n\n#### Bare repository path\n\nIt checks:\n\n```bash\ngit rev-parse --is-bare-repository\ngit rev-parse --absolute-git-dir\n```\n\nA bare repo is accepted only if the resolved Git dir matches the resolved `target_repo` path.\n\n#### Non-bare repository path\n\nIt checks:\n\n```bash\ngit rev-parse --show-toplevel\ngit config --get receive.denyCurrentBranch\n```\n\nA checked-out repo is accepted only if:\n\n- `--show-toplevel` resolves exactly to `target_repo`\n- `receive.denyCurrentBranch == updateInstead`\n\nOtherwise startup fails with:\n\n```text\nTARGET_REPO must be bare or set receive.denyCurrentBranch=updateInstead: <path>\n```\n\n### Why the exact path checks exist\n\nThe code does not merely check “is this somewhere inside a Git repo?” It verifies that `target_repo` itself is the repository root or bare repo directory. This prevents accidentally pointing configuration at a nested subdirectory.\n\n## `_get_git_output()`\n\n```python\nasync def _get_git_output(\n    git_executable: str,\n    target_repo: Path,\n    *args: str,\n) -> str | None:\n```\n\nThis helper runs a Git subprocess with:\n\n- executable: `git_executable`\n- working directory: `target_repo`\n- stdout/stderr captured asynchronously\n\nIt returns:\n\n- stripped stdout on success\n- `None` if the command exits non-zero\n\nThis function is intentionally narrow: it is a utility for repository validation, not a general Git abstraction.\n\n## Logging Configuration\n\n## `_configure_logging()`\n\n```python\ndef _configure_logging(settings: Settings) -> None:\n```\n\nThis maps `settings.log_level` to the `LogLevel` enum:\n\n```python\nlog_level = LogLevel[settings.log_level]\n```\n\nIf the configured value is invalid, it raises:\n\n```python\nConfigurationError(f\"Unsupported log level: {settings.log_level}\")\n```\n\nOn success, it applies the level with:\n\n```python\nlogger.set_level(log_level)\n```\n\nBecause this happens during app creation, invalid logging configuration prevents the app from starting.\n\n## Clone Directory Cleanup\n\n## `_cleanup_orphan_clones()`\n\n```python\nasync def _cleanup_orphan_clones(\n    db_pool: asyncpg.Pool,\n    settings: Settings,\n) -> list[Path]:\n```\n\nDarkFactory maintains clone directories for task execution. This helper removes clone directories that no longer correspond to active or recently failed tasks.\n\n### How it works\n\n1. Creates a `GitClient`\n2. Loads currently running task IDs via `list_running_task_ids(db_pool)`\n3. Loads recently failed task IDs via `list_failed_task_ids_updated_since(...)`\n4. Calls `git_client.cleanup_orphan_clones(...)`\n\nThe retention window for failed tasks is based on:\n\n```python\nFAILED_CLONE_RETENTION\n```\n\nfrom `lib.git.client`.\n\n### Retention behavior\n\nClone directories are preserved for:\n\n- tasks currently running\n- tasks that failed recently enough to still be within the retention window\n\nEverything else under `settings.clone_root` is eligible for cleanup.\n\n### Startup integration\n\nThe lifespan startup stores the returned paths and logs them if any were removed:\n\n```python\nlogger.info(\n    \"Removed orphan clone directories: \"\n    + \", \".join(str(path) for path in removed_clone_paths)\n)\n```\n\nThis cleanup happens before task recovery, which keeps the clone workspace aligned with persisted task state.\n\n## Task Service Wiring\n\nThe application delegates almost all task-related behavior to `TaskService`.\n\n### Default construction\n\nIf no custom factory is provided, startup creates the service with:\n\n```python\ncreate_task_service(\n    app.state.db_pool,\n    app_settings,\n    agent_registry=app.state.agent_registry,\n    tool_registry=app.state.tool_registry,\n    skill_catalog=app.state.skill_catalog,\n)\n```\n\nThis is the main integration point between bootstrap and the rest of the task execution system.\n\n### Custom construction\n\nIf `task_service_factory` is provided, startup uses:\n\n```python\ntask_service_factory(app.state.db_pool, app_settings)\n```\n\nNote that the custom factory receives only the pool and settings. If it needs registries or catalogs, it must obtain or reconstruct them itself.\n\n### Recovery hooks\n\nImmediately after construction, startup invokes:\n\n- `await app.state.task_service.recover_running_tasks()`\n- `await app.state.task_service.recover_publication_tasks()`\n\nThese calls are essential for crash recovery and restart continuity.\n\n## HTTP API Surface\n\nThe route package exports `tasks_router` from `api/routes/__init__.py`:\n\n```python\nfrom api.routes.tasks import router as tasks_router\n```\n\n`create_app()` includes it with:\n\n```python\napp.include_router(tasks_router)\n```\n\nAll task routes are defined in `api/routes/tasks.py`.\n\n## Router configuration\n\n```python\nrouter = APIRouter(prefix=\"/api/v1\", tags=[\"tasks\"])\n```\n\nAll endpoints below are rooted at `/api/v1`.\n\n## Route-to-service pattern\n\nEvery route follows the same pattern:\n\n1. retrieve `TaskService` from `request.app.state`\n2. call a service method\n3. convert the returned domain model into the declared response model\n\nThe helper is:\n\n```python\ndef _get_task_service(request: Request) -> TaskService:\n    return request.app.state.task_service\n```\n\nThis keeps route handlers thin and makes `TaskService` the real application boundary for task behavior.\n\n## Endpoints\n\n## `POST /api/v1/task`\n\nHandler:\n\n```python\nasync def create_task_route(request: Request, payload: TaskRequest) -> TaskResponse:\n```\n\nBehavior:\n\n- gets the task service\n- calls `await task_service.create_task(payload)`\n- returns `TaskResponse.model_validate(task.model_dump())`\n\nResponse:\n\n- status `202 Accepted`\n- body type `TaskResponse`\n\nUse this endpoint to submit a new task.\n\n## `GET /api/v1/task/{task_id}`\n\nHandler:\n\n```python\nasync def get_task_route(request: Request, task_id: str) -> TaskResponse:\n```\n\nBehavior:\n\n- calls `await task_service.get_task_response(task_id)`\n\nResponse:\n\n- status `200 OK`\n- body type `TaskResponse`\n\nThis is the primary task status/read endpoint.\n\nThe execution flow data shows that this path can trigger deeper task-plan resolution and Git reads through the service layer, so although the route itself is simple, the underlying read path is not necessarily trivial.\n\n## `POST /api/v1/task/{task_id}/follow-up`\n\nHandler:\n\n```python\nasync def create_task_follow_up_route(\n    request: Request,\n    task_id: str,\n    payload: TaskFollowUpRequest,\n) -> TaskResponse:\n```\n\nBehavior:\n\n- calls `await task_service.create_blocked_pr_follow_up(...)`\n\nArguments passed to the service:\n\n- `task_id=task_id`\n- `description=payload.description`\n\nResponse:\n\n- status `202 Accepted`\n- body type `TaskResponse`\n\nThis endpoint creates a follow-up task for an existing blocked PR workflow.\n\n## `POST /api/v1/task/{task_id}/publication/reconcile`\n\nHandler:\n\n```python\nasync def reconcile_task_publication_route(request: Request, task_id: str) -> TaskResponse:\n```\n\nBehavior:\n\n- calls `await task_service.reconcile_publication(task_id=task_id)`\n\nResponse:\n\n- status `200 OK`\n- body type `TaskResponse`\n\nThis endpoint is used to reconcile publication state, especially after partial or interrupted publication workflows.\n\n## `GET /api/v1/task/{task_id}/debug`\n\nHandler:\n\n```python\nasync def get_task_debug_route(request: Request, task_id: str) -> TaskDebugResponse:\n```\n\nBehavior:\n\n- calls `await task_service.get_task_debug_response(task_id)`\n\nResponse:\n\n- status `200 OK`\n- body type `TaskDebugResponse`\n\nThis is a debugging/inspection endpoint for task internals such as checkpoint or clone-related state, as exposed by the service layer.\n\n## `GET /api/v1/artifacts/{artifact_id}`\n\nHandler:\n\n```python\nasync def get_artifact_route(request: Request, artifact_id: str) -> ArtifactResponse:\n```\n\nBehavior:\n\n- calls `await task_service.get_artifact(artifact_id)`\n- converts the result with `ArtifactResponse.model_validate(artifact.model_dump())`\n\nResponse:\n\n- status `200 OK`\n- body type `ArtifactResponse`\n\nUse this endpoint to fetch persisted artifacts associated with tasks.\n\n## `GET /healthz`\n\nDefined directly in `create_app()`:\n\n```python\n@app.get(\"/healthz\", tags=[\"system\"])\nasync def healthcheck() -> dict[str, str]:\n    return {\"status\": \"ok\"}\n```\n\nThis is a lightweight liveness endpoint. It does not perform deep dependency checks.\n\n## Exception Handling\n\n`create_app()` registers exception handlers that translate domain and integration errors into HTTP responses.\n\n## Registered handlers\n\n### `TaskNotFoundError`\n\n```python\n@app.exception_handler(TaskNotFoundError)\n```\n\nResponse:\n\n- `404`\n- `{\"detail\": str(exc)}`\n\n### `ArtifactNotFoundError`\n\n```python\n@app.exception_handler(ArtifactNotFoundError)\n```\n\nResponse:\n\n- `404`\n- `{\"detail\": str(exc)}`\n\n### `WorkerBusyError`\n\n```python\n@app.exception_handler(WorkerBusyError)\n```\n\nResponse:\n\n- `503`\n- `{\"error\": \"worker_busy\", \"detail\": str(exc)}`\n\nThis is the only handler here that includes a structured error code field.\n\n### `InvalidTaskRequestError`\n\n```python\n@app.exception_handler(InvalidTaskRequestError)\n```\n\nResponse:\n\n- `400`\n- `{\"detail\": str(exc)}`\n\n### `GitHubAPIError`\n\n```python\n@app.exception_handler(GitHubAPIError)\n```\n\nResponse:\n\n- `400`\n- `{\"detail\": str(exc)}`\n\n## Error handling boundaries\n\nThese handlers are intentionally focused on known domain/integration exceptions. They do not replace FastAPI’s default handling for validation errors or unexpected exceptions.\n\nWhen adding new service-layer exceptions that should map to stable API responses, register them here rather than embedding HTTP concerns inside `TaskService`.\n\n## Design Patterns in This Module\n\n## 1. Application factory pattern\n\nUsing `create_app()` instead of a global app instance makes the module:\n\n- testable\n- configurable\n- compatible with Uvicorn factory loading\n- safe for startup-time dependency validation\n\n## 2. Thin routes, thick service layer\n\nThe route handlers in `api/routes/tasks.py` are intentionally minimal. They:\n\n- parse request inputs\n- delegate to `TaskService`\n- serialize outputs\n\nBusiness logic belongs in the service layer, not in the route module.\n\n## 3. Shared runtime state via `app.state`\n\nBootstrap-owned objects are attached to `app.state` and reused by handlers. This avoids repeated construction and keeps dependency wiring centralized.\n\n## 4. Fail-fast startup\n\n`_validate_startup()` ensures the process refuses to serve traffic when core dependencies are misconfigured.\n\n## 5. Recovery-first lifecycle\n\nStartup does more than initialize dependencies; it also restores interrupted work:\n\n- running tasks\n- publication tasks\n- clone workspace consistency\n\nThis is a key part of the application’s operational model.\n\n## How This Module Connects to the Rest of the Codebase\n\n## Configuration\n\nFrom `lib.config`:\n\n- `Settings`\n- `get_settings()`\n- `publish_model_api_keys()`\n\nThis module depends on configuration both for infrastructure (`database_url`, `clone_root`, `target_repo`) and runtime behavior (`workspace_root`, `log_level`, app name).\n\n## Database layer\n\nFrom `api.repositories.database`:\n\n- `create_pool()`\n- `close_pool()`\n- `validate_database_schema()`\n\nFrom `api.repositories.tasks`:\n\n- `list_running_task_ids()`\n- `list_failed_task_ids_updated_since()`\n\nBootstrap uses the database both for validation and for reconstructing runtime state.\n\n## Task service layer\n\nFrom `api.services`:\n\n- `TaskService`\n- `create_task_service()`\n\nFrom `api.services.tasks` indirectly through route calls:\n\n- `create_task()`\n- `get_task_response()`\n- `create_blocked_pr_follow_up()`\n- `reconcile_publication()`\n- `get_task_debug_response()`\n- `get_artifact()`\n- `shutdown()`\n- recovery methods\n\nThis module is the primary HTTP-to-service integration point.\n\n## Agent, tool, and skill systems\n\nFrom:\n\n- `api.agents.registry`\n- `api.tools.registry`\n- `api.skills.discovery`\n- `api.skills.bridge`\n\nBootstrap assembles these systems before constructing `TaskService`, so the service starts with a complete execution environment.\n\n## Git integration\n\nFrom `lib.git` / `lib.git.client`:\n\n- `GitClient`\n- `FAILED_CLONE_RETENTION`\n\nBootstrap uses Git in two distinct ways:\n\n- direct subprocess validation of `TARGET_REPO`\n- clone workspace cleanup through `GitClient`\n\n## GitHub integration\n\nFrom `lib.github.errors`:\n\n- `GitHubAPIError`\n\nThis module does not perform GitHub operations directly, but it exposes GitHub-related failures from deeper layers as HTTP `400` responses.\n\n## Contributing Notes\n\n## Adding a new endpoint\n\nIf you add a new API endpoint:\n\n1. define it in `api/routes/tasks.py` or another route module\n2. keep the handler thin\n3. retrieve dependencies from `request.app.state`\n4. delegate business logic to a service\n5. declare explicit request/response models\n6. register the router in `create_app()` if it is a new router\n\n## Adding new startup dependencies\n\nIf a new subsystem is required for correct operation:\n\n- validate it in `_validate_startup()`\n- prefer collecting issues and raising one `ConfigurationError`\n- initialize it during lifespan startup, not lazily in request handlers\n- shut it down during lifespan teardown\n\n## Adding new app state\n\nIf you attach new objects to `app.state`:\n\n- initialize them centrally in `create_app()` or lifespan startup\n- keep naming consistent and explicit\n- document whether they are available before or only after startup\n\n## Extending exception mapping\n\nIf a service-layer exception should become a stable API contract:\n\n- add an exception handler in `create_app()`\n- choose status code and payload shape deliberately\n- keep domain exceptions free of HTTP-specific logic\n\n## Testing implications\n\nThis module is heavily exercised through tests that call `create_app()` directly. That is a strong signal that:\n\n- `create_app()` should remain deterministic\n- dependency injection via `settings` and `task_service_factory` is important\n- startup behavior is part of the module’s public contract\n\nBe cautious when changing:\n\n- startup validation rules\n- `app.state` contents\n- lifespan ordering\n- route paths or response models\n- exception-to-status-code mappings\n\n## Summary\n\nThis module is the composition root for the API process.\n\n- `api/__main__.py` starts Uvicorn\n- `create_app()` assembles the FastAPI app\n- lifespan startup validates configuration, opens infrastructure, cleans clone state, and recovers interrupted work\n- `api/routes/tasks.py` exposes the task and artifact HTTP surface\n- exception handlers convert domain failures into stable HTTP responses\n\nIf you need to understand how a request enters the system, or where shared runtime dependencies are wired together, this is the module to start with.","configuration-and-logging-infrastructure":"# Configuration and Logging Infrastructure\n\n# Configuration and Logging Infrastructure\n\nThis module provides two foundational services used across the application:\n\n- `lib.config`: runtime configuration loading, validation, and startup checks\n- `lib.logger`: shared structured-ish logging with contextual metadata and optional file output\n\nTogether, they define how the process is configured at startup and how operational events are emitted during execution.\n\n---\n\n## Module Overview\n\n### `lib.config`\n`lib.config` centralizes environment-driven settings in the `Settings` class, built on `pydantic_settings.BaseSettings`. It is responsible for:\n\n- loading values from environment variables and `.env`\n- normalizing and validating configuration\n- checking filesystem assumptions needed at startup\n- deriving GitHub push URLs\n- publishing validated model API keys back into `os.environ` for SDKs that expect them there\n\n### `lib.logger`\n`lib.logger` wraps the standard library `logging` module in a singleton `Logger` class. It adds:\n\n- a custom `VERBOSE` level\n- console logging and optional file logging\n- per-context metadata via `ContextVar`\n- convenience methods for JSON payload logging\n- a module-level singleton instance: `logger`\n\nThis logger is used by higher-level services and API routes, including GitHub publication and task recovery flows.\n\n---\n\n## Configuration (`lib.config`)\n\n## `Settings`\n\n`Settings` is the main configuration object:\n\n```python\nclass Settings(BaseSettings):\n```\n\nIt is instantiated from environment variables, with aliases for externally-facing names like `DATABASE_URL`, `TARGET_REPO`, and `OPENAI_API_KEY`.\n\n### Settings source behavior\n\n`Settings.model_config` is defined as:\n\n```python\nSettingsConfigDict(\n    env_file=\".env\",\n    env_file_encoding=\"utf-8\",\n    case_sensitive=False,\n    extra=\"ignore\",\n)\n```\n\nThis means:\n\n- `.env` is loaded automatically if present\n- environment variable names are case-insensitive\n- unknown environment variables are ignored rather than rejected\n\n### Defined fields\n\nKey fields include:\n\n- `app_name: str = \"DarkFactory\"`\n- `environment: str = \"development\"`\n- `log_level: str = \"INFO\"`\n- `database_url: str = Field(alias=\"DATABASE_URL\")`\n- `target_repo: Path = Field(alias=\"TARGET_REPO\")`\n- `anthropic_api_key: str | None = Field(default=None, alias=\"ANTHROPIC_API_KEY\")`\n- `openai_api_key: str | None = Field(default=None, alias=\"OPENAI_API_KEY\")`\n- `darkfactory_self_modify: bool = Field(default=False, alias=\"DARKFACTORY_SELF_MODIFY\")`\n- `workspace_root: Path = Field(default=Path(\"workspace\"), alias=\"WORKSPACE_ROOT\")`\n- `clone_root: Path = Field(default=Path(\"/work/clones\"), alias=\"CLONE_ROOT\")`\n- GitHub-related settings:\n  - `gh_token`\n  - `gh_repo`\n  - `gh_base_url`\n  - `gh_git_remote_url`\n\n### Validation rules\n\nThe class uses both field-level and model-level validators.\n\n#### `validate_database_url()`\n\n```python\n@field_validator(\"database_url\")\ndef validate_database_url(cls, value: str) -> str:\n```\n\nRejects empty or whitespace-only `DATABASE_URL`.\n\n#### `expand_paths()`\n\n```python\n@field_validator(\"target_repo\", \"workspace_root\", \"clone_root\", mode=\"before\")\ndef expand_paths(cls, value: str | Path) -> Path:\n```\n\nExpands `~` and converts configured path values into `Path` objects before normal validation.\n\nThis applies to:\n\n- `target_repo`\n- `workspace_root`\n- `clone_root`\n\n#### `normalize_log_level()`\n\n```python\n@field_validator(\"log_level\")\ndef normalize_log_level(cls, value: str) -> str:\n```\n\nNormalizes `LOG_LEVEL` to uppercase and restricts it to:\n\n- `VERBOSE`\n- `DEBUG`\n- `INFO`\n- `WARNING`\n- `ERROR`\n- `CRITICAL`\n\nThis validator is important because `lib.logger.LogLevel` supports the same set, including the custom `VERBOSE` level.\n\n#### `validate_github_settings()`\n\n```python\n@model_validator(mode=\"after\")\ndef validate_github_settings(self) -> Settings:\n```\n\nEnforces consistency among GitHub-related settings:\n\n- `GH_REPO` requires `GH_TOKEN`\n- `GH_GIT_REMOTE_URL` requires `GH_REPO`\n- if `GH_REPO` is set and `GH_BASE_URL` is customized away from `https://api.github.com`, then `GH_GIT_REMOTE_URL` must also be set\n\nThat last rule prevents the code from guessing a Git remote URL when the GitHub API endpoint is not the public default.\n\n#### `validate_model_api_keys()`\n\n```python\n@model_validator(mode=\"after\")\ndef validate_model_api_keys(self) -> Settings:\n```\n\nRequires at least one model provider key:\n\n- `ANTHROPIC_API_KEY`, or\n- `OPENAI_API_KEY`\n\nWithout one of these, settings construction fails.\n\nThis is a startup gate: the application assumes at least one model backend is available.\n\n---\n\n## Startup checks\n\n### `get_startup_issues()`\n\n```python\ndef get_startup_issues(self) -> list[str]:\n```\n\nThis method performs non-fatal environment checks and returns a list of human-readable issues instead of raising immediately.\n\nIt checks:\n\n1. `WORKSPACE_ROOT` exists\n2. required workspace subdirectories exist:\n   - `workspace_root / \"prompts\"`\n   - `workspace_root / \"skills\"`\n   - `workspace_root / \"memory\"`\n3. `TARGET_REPO` exists\n4. `CLONE_ROOT` does not overlap `WORKSPACE_ROOT`\n5. `CLONE_ROOT` does not overlap `TARGET_REPO`\n\nOverlap detection is delegated to `_paths_overlap()`.\n\nThis split between validation and startup issues is intentional:\n\n- validators reject invalid configuration values and incompatible combinations\n- `get_startup_issues()` reports operational/environment problems in the filesystem\n\nThat makes it easier for startup code to decide whether to fail fast, warn, or surface issues through an API.\n\n### `_paths_overlap()`\n\n```python\ndef _paths_overlap(first: Path, second: Path) -> bool:\n```\n\nResolves both paths and returns `True` if:\n\n- they are the same path\n- one is a parent of the other\n\nThis protects clone operations from targeting the workspace or repository tree directly.\n\n---\n\n## Derived GitHub behavior\n\n### `get_github_push_url()`\n\n```python\ndef get_github_push_url(self) -> str | None:\n```\n\nReturns the Git remote URL to use for pushes.\n\nBehavior:\n\n- if `gh_repo` is `None`, returns `None`\n- if `gh_git_remote_url` is set, returns it directly\n- if `gh_base_url` is the default public GitHub API URL, derives:\n  - `https://github.com/{gh_repo}.git`\n- otherwise returns `None`\n\nThis method is conservative by design. It only derives a remote URL automatically for the standard public GitHub case.\n\n---\n\n## Settings lifecycle and caching\n\n### `get_settings()`\n\n```python\n@lru_cache(maxsize=1)\ndef get_settings() -> Settings:\n```\n\nReturns a singleton-like cached `Settings` instance.\n\nImportant properties:\n\n- the first call constructs `Settings()`\n- subsequent calls return the same object\n- changes to environment variables after the first call will not be reflected unless the cache is cleared\n\nThis pattern is useful for application startup and dependency injection, but contributors should remember the cache when writing tests or dynamic configuration code.\n\n### Typical usage\n\n```python\nfrom lib.config import get_settings\n\nsettings = get_settings()\n```\n\n### Testing implications\n\nBecause `get_settings()` is cached, tests that mutate environment variables usually need to avoid stale settings. Common approaches are:\n\n- instantiate `Settings()` directly in tests\n- clear the cache before calling `get_settings()` again\n\nThe test suite already constructs `Settings` directly in many places, which avoids cache coupling.\n\n---\n\n## Publishing API keys to SDKs\n\n### `publish_model_api_keys()`\n\n```python\ndef publish_model_api_keys(settings: Settings) -> None:\n```\n\nSome SDKs read credentials directly from process environment variables rather than from an injected config object. This helper bridges validated settings back into `os.environ`.\n\nBehavior:\n\n- if `settings.anthropic_api_key` is set, calls:\n  - `os.environ.setdefault(\"ANTHROPIC_API_KEY\", settings.anthropic_api_key)`\n- if `settings.openai_api_key` is set, calls:\n  - `os.environ.setdefault(\"OPENAI_API_KEY\", settings.openai_api_key)`\n\n`setdefault()` is significant:\n\n- existing environment values are preserved\n- validated settings only fill in missing process env entries\n\nThis avoids unexpectedly overwriting credentials that were already exported by the runtime environment.\n\n---\n\n## Logging (`lib.logger`)\n\n## Custom log level\n\nAt module import time, `lib.logger` defines:\n\n```python\nVERBOSE_LEVEL = 5\nlogging.addLevelName(VERBOSE_LEVEL, \"VERBOSE\")\n```\n\nThis creates a level below `DEBUG`, useful for very chatty diagnostics.\n\n### `LogLevel`\n\n```python\nclass LogLevel(Enum):\n```\n\nMaps supported names to numeric logging levels:\n\n- `VERBOSE = 5`\n- `DEBUG = logging.DEBUG`\n- `INFO = logging.INFO`\n- `WARNING = logging.WARNING`\n- `ERROR = logging.ERROR`\n- `CRITICAL = logging.CRITICAL`\n\nThis enum is the intended input to `Logger.set_level()`.\n\n---\n\n## `Logger` singleton\n\n### Construction model\n\n```python\nclass Logger:\n```\n\n`Logger` is implemented as a singleton using both `__new__()` and an `_initialized` guard in `__init__()`.\n\n#### `__new__()`\n\n```python\ndef __new__(cls, *args: object, **kwargs: object) -> Logger:\n```\n\nEnsures only one instance is ever allocated.\n\n#### `__init__()`\n\n```python\ndef __init__(self, name: str = \"darkfactory\", write_to_file: bool = True) -> None:\n```\n\nInitializes the singleton only once. Later constructor calls return the same instance and skip reconfiguration.\n\nThis has an important consequence:\n\n- constructor arguments only matter on the first initialization\n- later calls like `Logger(name=\"other\")` do not create a differently configured logger\n\nThe test suite explicitly checks this singleton behavior.\n\n### Initialization behavior\n\nOn first initialization, `Logger.__init__()`:\n\n1. reconfigures `sys.stdout` and `sys.stderr` to UTF-8 on Windows\n2. creates or retrieves a standard library logger with `logging.getLogger(name)`\n3. disables propagation with:\n   ```python\n   self.logger.propagate = False\n   ```\n4. sets the underlying logger level to `logging.DEBUG` initially\n5. creates a `ContextVar` for contextual metadata\n6. installs a console `StreamHandler` to `sys.stdout`\n7. optionally installs a file `FileHandler`\n8. calls `self.set_level(LogLevel.INFO)`\n\n### Handlers and formatting\n\nBoth console and file handlers use the same formatter:\n\n```python\n\"%(asctime)s | %(levelname)-8s | %(message)s\"\n```\n\nwith date format:\n\n```python\n\"%Y-%m-%d %H:%M:%S\"\n```\n\nExample output:\n\n```text\n2026-04-12 10:15:30 | INFO     | Publishing task | Context: task_id=123 repo=org/project\n```\n\n### File logging\n\nIf `write_to_file=True`, logs are written under:\n\n```python\nPath(__file__).resolve().parent.parent / \"logs\"\n```\n\nThe file name format is:\n\n```text\ndarkfactory_YYYYMMDD_HHMMSS.log\n```\n\nThe directory is created automatically.\n\n### Test-aware default logger instance\n\nAt module scope:\n\n```python\nlogger = Logger(write_to_file=os.getenv(\"PYTEST_CURRENT_TEST\") is None)\n```\n\nThis means:\n\n- in normal runtime, file logging is enabled\n- under pytest, file logging is disabled automatically if `PYTEST_CURRENT_TEST` is present\n\nThat keeps tests from creating log files by default.\n\n---\n\n## Context-aware logging\n\nOne of the most useful features in this module is contextual logging via `ContextVar`.\n\n### Internal context storage\n\n`Logger` stores context in:\n\n```python\nself._context: ContextVar[dict[str, Any]]\n```\n\nBecause it uses `ContextVar`, context is scoped to the current execution context rather than being a single mutable global dictionary. This is safer for concurrent or nested flows than a plain instance attribute.\n\n### `add_context()`\n\n```python\ndef add_context(self, **kwargs: object) -> None:\n```\n\nMerges new key/value pairs into the current context.\n\nExample:\n\n```python\nlogger.add_context(task_id=42, repo=\"org/project\")\nlogger.info(\"Starting publication\")\n```\n\n### `clear_context()`\n\n```python\ndef clear_context(self) -> None:\n```\n\nRemoves all current context.\n\n### `scoped_context()`\n\n```python\n@contextmanager\ndef scoped_context(self, **kwargs: object):\n```\n\nTemporarily extends the current context for the duration of a `with` block, then restores the previous context automatically.\n\nExample:\n\n```python\nwith logger.scoped_context(task_id=42, branch=\"feature/x\"):\n    logger.info(\"Pushing branch\")\n```\n\nThis pattern is used in cross-module flows such as task publication and recovery, where nested operations need consistent metadata attached to all emitted log lines.\n\n### `_format_message()`\n\n```python\ndef _format_message(self, message: str) -> str:\n```\n\nThis is the central formatting hook used by all log-level methods.\n\nIt performs two tasks:\n\n1. On Windows, strips characters outside the Basic Multilingual Plane:\n   ```python\n   message = \"\".join(character for character in message if ord(character) < 0x10000)\n   ```\n   This avoids encoding/display issues on Windows terminals.\n\n2. Appends context if present:\n   ```text\n   {message} | Context: key=value | other=value\n   ```\n\nAll public logging methods route through `_format_message()` before delegating to the underlying `logging.Logger`.\n\n---\n\n## Logging methods\n\n### Plain message methods\n\n`Logger` exposes standard severity methods:\n\n- `verbose(message: str) -> None`\n- `debug(message: str) -> None`\n- `info(message: str) -> None`\n- `warning(message: str) -> None`\n- `error(message: str) -> None`\n- `critical(message: str) -> None`\n- `exception(message: str) -> None`\n\nEach method formats the message with `_format_message()` and then calls the corresponding standard-library logger method.\n\n`exception()` is intended to be called inside an exception handler so traceback information is included by the underlying logging framework.\n\n### JSON helpers\n\nFor structured payload inspection, `Logger` provides:\n\n- `debug_json(message: str, payload: Any, indent: int = 2, **kwargs: Any) -> None`\n- `info_json(message: str, payload: Any, indent: int = 2, **kwargs: Any) -> None`\n- `warning_json(message: str, payload: Any, indent: int = 2, **kwargs: Any) -> None`\n\nThese methods call `_serialize_json()` and then log the resulting string at the appropriate level.\n\n#### `_serialize_json()`\n\n```python\ndef _serialize_json(self, message: str, payload: Any, indent: int = 2, **kwargs: Any) -> str:\n```\n\nBehavior:\n\n- attempts `json.dumps(payload, indent=indent, default=str, **kwargs)`\n- if serialization fails with `TypeError`, returns a fallback message containing the exception and raw payload\n- otherwise returns:\n  ```text\n  {message}\n  {serialized_json}\n  ```\n\nUsing `default=str` makes these helpers tolerant of many non-JSON-native objects.\n\n---\n\n## How configuration and logging fit together\n\nThese modules are loosely coupled rather than directly integrated.\n\n- `Settings.log_level` validates the configured level name\n- `Logger.set_level()` accepts a `LogLevel` enum value\n- startup code elsewhere is expected to translate from `settings.log_level` to `LogLevel[...]` and apply it\n\nSimilarly:\n\n- `Settings` validates model API keys\n- `publish_model_api_keys(settings)` exposes them to SDKs\n- higher-level startup code decides when to call that helper\n\nThis separation keeps both modules focused:\n\n- `lib.config` knows what configuration is valid\n- `lib.logger` knows how to emit logs\n- application bootstrap code wires them together\n\n---\n\n## Execution flow in the wider codebase\n\nThe logger is not just a utility; it participates in important operational flows.\n\nIn publication and recovery paths, higher-level services use `logger.scoped_context(...)` to attach metadata such as task identifiers, branch names, or repository information. Later calls to `logger.info(...)` automatically include that context because all severity methods pass through `_format_message()`.\n\nA simplified flow looks like this:\n\n```mermaid\nflowchart TD\n    A[API route/service] --> B[publish_task_to_github]\n    B --> C[logger.scoped_context]\n    B --> D[logger.info]\n    D --> E[_format_message]\n    E --> F[console/file handlers]\n```\n\nThis pattern shows up in task publication and reconciliation flows, where contextual logging helps correlate nested operations without manually repeating identifiers in every message.\n\n---\n\n## Common usage patterns\n\n## Loading settings\n\n```python\nfrom lib.config import get_settings\n\nsettings = get_settings()\nissues = settings.get_startup_issues()\nif issues:\n    for issue in issues:\n        print(issue)\n```\n\n## Publishing model credentials for SDKs\n\n```python\nfrom lib.config import get_settings, publish_model_api_keys\n\nsettings = get_settings()\npublish_model_api_keys(settings)\n```\n\n## Applying configured log level\n\n```python\nfrom lib.config import get_settings\nfrom lib.logger import LogLevel, logger\n\nsettings = get_settings()\nlogger.set_level(LogLevel[settings.log_level])\n```\n\n## Contextual logging\n\n```python\nfrom lib.logger import logger\n\nwith logger.scoped_context(task_id=123, repo=\"org/project\"):\n    logger.info(\"Publishing task\")\n    logger.debug_json(\"Publication payload\", {\"branch\": \"feature/test\"})\n```\n\n---\n\n## Design notes and contributor guidance\n\n## Why `Settings` uses validators heavily\n\nThe validators in `Settings` are doing more than type conversion:\n\n- they encode deployment assumptions\n- they prevent ambiguous GitHub configuration\n- they ensure at least one model backend is available\n- they normalize values early so downstream code can stay simpler\n\nIf you add a new setting, decide whether it belongs in:\n\n- a field validator: for normalization or single-field constraints\n- a model validator: for cross-field consistency\n- `get_startup_issues()`: for filesystem or environment checks that may be reported rather than raised\n\nA good rule of thumb:\n\n- raise during model construction for invalid configuration\n- return startup issues for missing directories or unsafe path layouts\n\n## Why logger context uses `ContextVar`\n\n`ContextVar` allows nested operations to add metadata without permanently mutating global state. This is especially useful in service-layer code where one request or task may trigger multiple lower-level operations.\n\nPrefer:\n\n```python\nwith logger.scoped_context(task_id=task.id):\n    ...\n```\n\nover manually calling `add_context()` and `clear_context()` unless you truly need long-lived context.\n\n## Singleton caveats\n\nBoth `get_settings()` and `Logger` are singleton-like.\n\nThat simplifies application-wide access, but it also means:\n\n- initialization order matters\n- tests must account for cached/shared state\n- constructor arguments to `Logger` only affect first initialization\n\nWhen changing either module, be careful not to introduce hidden state that makes tests or startup behavior brittle.\n\n---\n\n## Reference\n\n## `lib.config`\n\n- `class Settings(BaseSettings)`\n- `Settings.validate_database_url()`\n- `Settings.expand_paths()`\n- `Settings.validate_github_settings()`\n- `Settings.validate_model_api_keys()`\n- `Settings.normalize_log_level()`\n- `Settings.get_startup_issues()`\n- `Settings.get_github_push_url()`\n- `get_settings()`\n- `publish_model_api_keys(settings: Settings)`\n- `_paths_overlap(first: Path, second: Path)`\n\n## `lib.logger`\n\n- `VERBOSE_LEVEL`\n- `class LogLevel(Enum)`\n- `class Logger`\n  - `Logger.__new__()`\n  - `Logger.__init__()`\n  - `Logger.set_level()`\n  - `Logger.add_context()`\n  - `Logger.clear_context()`\n  - `Logger.scoped_context()`\n  - `Logger.verbose()`\n  - `Logger.debug()`\n  - `Logger.info()`\n  - `Logger.warning()`\n  - `Logger.error()`\n  - `Logger.critical()`\n  - `Logger.exception()`\n  - `Logger.debug_json()`\n  - `Logger.info_json()`\n  - `Logger.warning_json()`\n  - `Logger._format_message()`\n  - `Logger._serialize_json()`\n- `logger`","domain-models-and-error-contracts":"# Domain Models and Error Contracts\n\n# Domain Models and Error Contracts\n\nThis module defines the core data contracts and domain-level exceptions used across the API and service layers.\n\nIt has two responsibilities:\n\n- provide validated, typed models for tasks, artifacts, agents, reviews, repository context, and verification\n- provide stable exception types that communicate domain failures without leaking lower-level implementation details\n\nMost of the models are implemented with Pydantic `BaseModel`, which means they serve as both validation boundaries and serialization contracts.\n\n## Module layout\n\n- `api/errors.py` — domain exceptions\n- `api/models/__init__.py` — package export surface\n- `api/models/agents.py` — agent/tool configuration contracts\n- `api/models/artifacts.py` — artifact persistence and response models\n- `api/models/review.py` — structured review outcomes\n- `api/models/tasks.py` — task persistence, request/response, and graph state models\n- `api/models/verification.py` — verification configuration and results\n- `lib/errors.py` — shared base exception type\n\n## Design goals\n\n### Strong validation at boundaries\n\nThese models reject malformed input early using:\n\n- `Literal[...]` type aliases for enums-like fields\n- `Field(...)` constraints such as `ge=1`, `min_length=1`\n- `field_validator(...)` for field normalization and validation\n- `model_validator(...)` for cross-field invariants\n\n### Separation between persistence, requests, responses, and runtime state\n\nThe task and artifact models are intentionally split by use case:\n\n- `*Create` models for inserts\n- `*Update` models for partial updates\n- `*Record` models for durable repository rows\n- `*Response` models for HTTP responses\n- runtime state models like `PlanningState` and `ImplementationState` for graph execution\n\n### Domain-specific exceptions\n\nErrors in `api/errors.py` are semantic contracts. They let services and routes distinguish failures such as:\n\n- missing tasks or artifacts\n- invalid lifecycle transitions\n- execution scope violations\n- configuration and parsing failures\n- registry lookup failures for agents, tools, and skills\n\n## Error contracts\n\n## Base exception: `DarkFactoryError`\n\nDefined in `lib/errors.py`.\n\n```python\nclass DarkFactoryError(Exception):\n    def __init__(\n        self,\n        message: str,\n        *,\n        correlation_id: str | None = None,\n        details: dict[str, Any] | None = None,\n    ) -> None:\n```\n\n### Behavior\n\n- stores a human-readable `message`\n- optionally carries `correlation_id`\n- optionally carries structured `details`\n- `__str__()` returns `message`\n\nThis makes it safe to raise across both `lib/` and `api/` code without coupling callers to transport-specific error handling.\n\n### Typical usage\n\nUse `DarkFactoryError` subclasses when the failure is part of the domain contract and should be understood by higher layers.\n\n## API/domain exceptions\n\nDefined in `api/errors.py`, all inheriting from `DarkFactoryError`.\n\n### Lookup failures\n\n- `TaskNotFoundError` — task lookup failed\n- `ArtifactNotFoundError` — artifact lookup failed\n- `AgentNotFoundError` — agent config lookup failed\n- `ToolNotFoundError` — DarkFactory-owned tool lookup failed\n- `SkillNotFoundError` — discovered workspace skill lookup failed\n\n### Validation and state failures\n\n- `InvalidStateTransitionError` — invalid task lifecycle transition\n- `InvalidTaskRequestError` — task request violates service-owned constraints\n- `ConfigurationError` — startup or reload configuration is invalid\n\n### Execution and safety failures\n\n- `ExecutionScopeError` — attempted read/write outside the task clone\n- `MemoryGovernanceError` — governed workspace memory rules were violated\n- `WorkerBusyError` — single worker is already occupied\n\n### Integration and parsing failures\n\n- `LLMParseError` — structured LLM output could not be parsed safely\n- `AgentError` — agent integration boundary failed\n- `SkillParseError` — workspace skill definition could not be parsed safely\n\n## How exceptions propagate\n\nThese exceptions are raised deep in service or infrastructure flows and bubble up as domain failures.\n\n```mermaid\nflowchart TD\n    A[Route/Service] --> B[Domain service]\n    B --> C[Registry / Graph / Scope / Memory]\n    C --> D[Domain exception]\n    D --> A\n```\n\nExamples from current execution flows:\n\n- `ExecutionScopeError`\n  - raised when task plan resolution attempts to access a path outside the clone root\n  - flow includes `get_task_route -> get_task_response -> _resolve_task_plan -> _read_plan_from_clone -> get_task_clone_path -> _ensure_within_root`\n\n- `ToolNotFoundError`\n  - raised during graph construction when an agent references a missing DarkFactory-owned tool\n  - flow includes `run_task_graph` or `inspect_task_recovery` through `resolve_registered_agent_config` and `api/tools/registry.py:get`\n\n- `SkillParseError`\n  - raised during skill discovery when frontmatter parsing fails\n\n- `MemoryGovernanceError`\n  - raised when governed memory writes violate path rules\n\nThese are intentionally domain-named exceptions rather than raw `ValueError`, filesystem errors, or parser errors.\n\n---\n\n# Model package export surface\n\n`api/models/__init__.py` re-exports the public model types used by the rest of the codebase.\n\nThis file is the package-level import surface for:\n\n- agent configuration types and helpers\n- artifact models\n- repository context models\n- review models\n- verification models\n- task models\n\nContributors should update `__all__` when adding new public models intended for package-level import.\n\n---\n\n# Agent and tool configuration models\n\nDefined in `api/models/agents.py`.\n\nThese models describe the configuration contract between DarkFactory and the agent/tool registry layer.\n\n## Type aliases and constants\n\n### `AgentToolPermission`\n\n```python\nLiteral[\"read\", \"write\", \"admin\"]\n```\n\nRepresents permission levels for DarkFactory-owned tools.\n\n### `AgentBackendType`\n\n```python\nLiteral[\"filesystem\", \"shell\"]\n```\n\nRepresents the execution backend for an agent.\n\n### `DEEPAGENTS_BUILTIN_TOOL_NAMES`\n\nA frozen set of built-in DeepAgents tool names:\n\n- `edit_file`\n- `execute`\n- `glob`\n- `grep`\n- `ls`\n- `read_file`\n- `task`\n- `write_file`\n- `write_todos`\n\nThese are always present and must not be listed in `AgentConfig.tools`.\n\n### Permission ordering\n\n`_PERMISSION_ORDER` defines the hierarchy:\n\n- `read` = 0\n- `write` = 1\n- `admin` = 2\n\n## Permission helpers\n\n### `normalize_permission_level(level: str) -> AgentToolPermission`\n\nNormalizes and validates a permission string by:\n\n- trimming whitespace\n- lowercasing\n- checking membership in `_PERMISSION_ORDER`\n\nRaises `ValueError` for invalid values.\n\nExamples:\n\n- `\" read \"` -> `\"read\"`\n- `\"WRITE\"` -> `\"write\"`\n- `\"owner\"` -> `ValueError`\n\n### `permission_allows(granted_level, required_level) -> bool`\n\nReturns whether the granted permission satisfies the required permission.\n\nInternally it calls `normalize_permission_level()` for both arguments.\n\nExamples:\n\n- `permission_allows(\"admin\", \"write\")` -> `True`\n- `permission_allows(\"write\", \"read\")` -> `True`\n- `permission_allows(\"read\", \"admin\")` -> `False`\n\n## `ToolPermission`\n\nImmutable permission declaration for a DarkFactory-owned tool capability.\n\nFields:\n\n- `name: str`\n- `level: AgentToolPermission`\n\nValidation:\n\n- `name` is stripped and must not be empty\n- `level` is normalized through `normalize_permission_level`\n\nBecause `model_config = ConfigDict(frozen=True)`, instances are immutable after creation.\n\n## `ToolConfig`\n\nImmutable configuration for a DarkFactory-owned tool.\n\nFields:\n\n- `name: str`\n- `description: str`\n- `permissions: list[ToolPermission]`\n\nValidation rules:\n\n- `name` and `description` are stripped and must not be empty\n- `permissions` must contain at least one entry\n- permission names must be unique within the tool config\n\nThis model is used by tool registry code; the call graph shows `api/tools/git.py` constructing `ToolPermission` and `ToolConfig`.\n\n## `CompositeConfig`\n\nImmutable configuration for a composite agent composed of sub-agents.\n\nFields:\n\n- `agents: list[str]`\n- `aggregator: str`\n\nValidation rules:\n\n- `agents` must contain at least one non-empty name\n- agent names are stripped\n- agent names must be unique\n- `aggregator` is stripped and must not be empty\n\n## `AgentConfig`\n\nImmutable structured configuration for a single agent role.\n\nFields:\n\n- `name: str`\n- `model: str`\n- `prompt_template: str`\n- `tools: list[str] = []`\n- `tool_permissions: dict[str, AgentToolPermission] = {}`\n- `backend: AgentBackendType`\n- `composite: CompositeConfig | None = None`\n\n### Field validation\n\n#### `name`, `model`, `prompt_template`\n\nAll are stripped and must not be empty.\n\n#### `tools`\n\nValidation enforces:\n\n- no empty tool names after stripping\n- uniqueness\n- no DeepAgents built-ins listed explicitly\n\nThat last rule is important: `AgentConfig.tools` is only for DarkFactory-owned registry tools. Built-ins are implicitly available.\n\n#### `tool_permissions`\n\nValidation enforces:\n\n- permission map keys are stripped and must not be empty\n- permission values are normalized via `normalize_permission_level`\n\n### Cross-field validation\n\nThe `validate_tool_permissions()` model validator enforces several invariants:\n\n#### Every declared tool must have a permission entry\n\nIf a tool appears in `tools` but not in `tool_permissions`, validation fails.\n\n#### No undeclared permission entries\n\nIf `tool_permissions` contains a tool not listed in `tools`, validation fails.\n\n#### Composite model sentinel rules\n\n- if `composite is not None`, then `model` must be `\"composite\"`\n- if `model == \"composite\"`, then `composite` must be provided\n\nThis prevents partially configured composite agents.\n\n### Integration points\n\nThe call graph shows `api/agents/registry.py` constructing `AgentConfig`. That makes this model the validation boundary for agent registry loading.\n\n### Example pattern\n\n```python\nAgentConfig(\n    name=\"planner\",\n    model=\"gpt-4.1\",\n    prompt_template=\"planner.md\",\n    tools=[\"git\"],\n    tool_permissions={\"git\": \"write\"},\n    backend=\"shell\",\n)\n```\n\nComposite example:\n\n```python\nAgentConfig(\n    name=\"meta-planner\",\n    model=\"composite\",\n    prompt_template=\"meta.md\",\n    tools=[],\n    tool_permissions={},\n    backend=\"shell\",\n    composite=CompositeConfig(\n        agents=[\"planner_a\", \"planner_b\"],\n        aggregator=\"merge_reviews\",\n    ),\n)\n```\n\n---\n\n# Artifact models\n\nDefined in `api/models/artifacts.py`.\n\nArtifacts are durable outputs associated with tasks.\n\n## `ArtifactType`\n\n```python\nLiteral[\"review\", \"code_review\", \"result\", \"verification\"]\n```\n\nThe inclusion of `\"verification\"` is important because verification output is persisted as an artifact type.\n\n## `ArtifactRecord`\n\nImmutable durable artifact row returned from the repository layer.\n\nFields:\n\n- `artifact_id: str`\n- `task_id: str`\n- `type: ArtifactType`\n- `version: int = Field(ge=1)`\n- `agent: str`\n- `content: str`\n- `metadata: JsonObject | None = None`\n- `created_at: datetime`\n\n## `ArtifactCreate`\n\nInsert payload for creating an artifact row.\n\nSame core fields as `ArtifactRecord`, except:\n\n- no `created_at`\n- `version` defaults to `1`\n\n## `ArtifactUpdate`\n\nPartial update model.\n\nAll mutable fields are optional:\n\n- `task_id`\n- `type`\n- `version`\n- `agent`\n- `content`\n- `metadata`\n\n## `ArtifactResponse`\n\nHTTP response model for artifact queries.\n\nIt currently subclasses `ArtifactRecord` directly, so the API response shape matches the durable record shape.\n\n---\n\n# Review models\n\nDefined in `api/models/review.py`.\n\nThese models represent structured reviewer output used in planning and implementation loops.\n\n## `ReviewStatus`\n\n```python\nLiteral[\"APPROVED\", \"NEEDS_REVISION\", \"REJECTED\"]\n```\n\n## `ReviewRiskLevel`\n\n```python\nLiteral[\"low\", \"medium\", \"high\", \"critical\"]\n```\n\n## `ReviewFeedback`\n\nImmutable structured review result.\n\nFields:\n\n- `status: ReviewStatus`\n- `risk_level: ReviewRiskLevel`\n- `feedback: str = Field(min_length=1)`\n- `suggestions: list[str] = Field(default_factory=list)`\n\nThis model is embedded in runtime graph state:\n\n- `PlanningState.review`\n- `ImplementationState.code_review`\n\n---\n\n# Task models\n\nDefined in `api/models/tasks.py`.\n\nThis is the largest model group because it covers:\n\n- persistence\n- HTTP request/response contracts\n- normalized task context\n- mutable graph execution state\n\n## Shared JSON aliases\n\n### `JsonValue`\n\n```python\nTypeAlias = object\n```\n\n### `JsonObject`\n\n```python\nTypeAlias = dict[str, JsonValue]\n```\n\nThese are intentionally broad. They provide a typed placeholder for arbitrary JSON-like payloads without deeply constraining schema at this layer.\n\n## Task enums-like aliases\n\n### `TaskType`\n\n```python\nLiteral[\"plan\", \"implement\"]\n```\n\n### `TaskStatus`\n\n```python\nLiteral[\"running\", \"succeeded\", \"failed\"]\n```\n\n### `PublicationStatus`\n\n```python\nLiteral[\n    \"pending\",\n    \"published\",\n    \"pr_open\",\n    \"awaiting_human_review\",\n    \"blocked\",\n    \"merged\",\n    \"failed\",\n]\n```\n\n### `DebugHeadSource`\n\n```python\nLiteral[\"clone\", \"target_repo\"]\n```\n\nUsed by `TaskDebugResponse` to indicate where the reported head commit came from.\n\n## `GitHubTaskRequest`\n\nOptional GitHub linkage supplied at task creation time.\n\nFields:\n\n- `repo: str`\n- `issue: int | None = Field(default=None, ge=1)`\n- `pr: int | None = Field(default=None, ge=1)`\n\nCross-field validation:\n\n- at least one of `issue` or `pr` must be provided\n\nThis prevents meaningless GitHub linkage objects like `{repo: \"...\", issue: null, pr: null}`.\n\n## Persistence models\n\n## `TaskRecord`\n\nImmutable durable task row returned from the repository layer.\n\nKey fields include:\n\n- identifiers: `task_id`, `thread_id`\n- task definition: `type`, `description`, `context`\n- execution state: `status`, `branch_name`, `plan_path`\n- publication state: `publication_status`, `published_at`, `publication_error`\n- GitHub linkage: `github_repo`, `github_issue_number`, `github_pr_number`\n- lineage: `origin_task_id`, `follow_up_to_task_id`, `follow_up_trigger_key`\n- outcome: `result_type`, `result`, `error`\n- timestamps: `created_at`, `updated_at`\n\n### Serialization aliases\n\n`github_issue_number` and `github_pr_number` use:\n\n- `serialization_alias=\"github_issue\"`\n- `serialization_alias=\"github_pr\"`\n\nThis means serialized output can expose API-friendly names while the Python model keeps explicit internal field names.\n\n### `populate_by_name=True`\n\n`TaskRecord` sets:\n\n```python\nmodel_config = ConfigDict(frozen=True, populate_by_name=True)\n```\n\nThis allows population using field names even when aliases exist.\n\n## `TaskCreate`\n\nInsert payload for creating a task row.\n\nIt mirrors most of `TaskRecord` but omits repository-managed timestamps and provides defaults such as:\n\n- `status = \"running\"`\n- `publication_status = \"pending\"`\n\n## `TaskUpdate`\n\nPartial update model for service-owned task mutations.\n\nEvery field is optional, allowing targeted updates without reconstructing the full record.\n\n## Request/response models\n\n## `TaskRequest`\n\nHTTP request payload for creating a task.\n\nFields:\n\n- `type: TaskType`\n- `description: str`\n- `context: JsonObject | None = None`\n- `github: GitHubTaskRequest | None = None`\n- `follow_up_to_task_id: str | None = None`\n- `reuse_branch: str | None = None`\n\nCross-field validation:\n\n- `reuse_branch` is only allowed when `follow_up_to_task_id` is set\n\nThis encodes a service-level invariant directly in the request contract.\n\n## `TaskFollowUpRequest`\n\nOperator request payload for creating blocked-PR follow-up work.\n\nFields:\n\n- `description: str`\n\n## `TaskResponse`\n\nHTTP response model for task queries.\n\nIt extends `TaskRecord` with resolved/aggregated data:\n\n- `plan: TaskPlanResponse | None = None`\n- `artifacts: list[TaskArtifactSummary] = []`\n\nThis is the API-facing enriched task view.\n\n## `TaskDebugResponse`\n\nBest-effort operator snapshot for in-flight or completed tasks.\n\nAdditional fields include:\n\n- graph execution info: `current_node`, `next_nodes`, `graph_status`\n- artifact pointers: `latest_artifact_ids`, `latest_execution_summary_artifact_id`, `latest_execution_summary_node_name`\n- clone/debug info: `clone_present`, `clone_path`, `head_commit`, `head_source`\n- checkpoint info: `checkpoint_available`, `checkpoint_created_at`, `checkpoint_step`, `checkpoint_error`\n\nThis model is operational rather than purely domain-persistent.\n\n## Response helper models\n\n### `TaskPlanResponse`\n\nResolved plan details for task query responses.\n\nFields:\n\n- `path: str`\n- `iterations: int = Field(ge=0)`\n- `content: str`\n\n### `TaskArtifactSummary`\n\nArtifact metadata surfaced on task query responses.\n\nFields:\n\n- `artifact_id: str`\n- `type: str`\n- `version: int = Field(ge=1)`\n- `agent: str`\n- `created_at: datetime`\n\n## Structured task context\n\n## `TaskContext`\n\nStructured task context passed into graph state.\n\nFields:\n\n- `base_branch: str = \"main\"`\n- `relevant_files: list[str] = []`\n- `extra: dict[str, str] = {}`\n\nThis is the normalized shape expected by graph execution code.\n\n## `normalize_task_context(context: JsonObject | None) -> JsonObject`\n\nValidates and normalizes arbitrary task context into the `TaskContext` schema, then returns a dumped dictionary.\n\n```python\nreturn TaskContext.model_validate(context or {}).model_dump()\n```\n\nUse this when accepting loose JSON input but wanting a stable internal shape with defaults applied.\n\n## Runtime graph state models\n\nThese models are not persistence rows. They represent mutable execution state owned by LangGraph-style workflows.\n\n## `PlanningState`\n\nFields:\n\n- task identity: `task_id`, `task_description`\n- normalized context: `context: TaskContext`\n- branch/plan location: `branch_name`, `plan_path`\n- GitHub context: `github_issue`, `github_pr`\n- repository context: `repo_context`\n- execution workspace: `clone_path`\n- outputs: `plan`, `review`, `result`\n- loop control: `iteration`, `max_iterations`\n- status:\n  - `\"in_progress\"`\n  - `\"approved\"`\n  - `\"rejected\"`\n  - `\"max_iterations\"`\n  - `\"terminal_error\"`\n\nNotable embedded models:\n\n- `IssueContext` and `PRContext` from `api.github.context`\n- `RepositoryContext`\n- `ReviewFeedback`\n\n## `ImplementationState`\n\nExtends `PlanningState` with implementation-specific loop state.\n\nAdditional fields:\n\n- `code_review: ReviewFeedback | None = None`\n- `last_reviewed_commit: str | None = None`\n- `code_iteration: int = Field(default=0, ge=0)`\n- `max_code_iterations: int = Field(default=10, ge=1)`\n- `verification: VerificationResult | None = None`\n- `verification_baseline_failures: list[str] = []`\n- `last_verified_commit: str | None = None`\n- `code_attempt: int = Field(default=0, ge=0)`\n- `max_code_attempts: int = Field(default=10, ge=1)`\n\n### Important loop semantics\n\nThe class docstring distinguishes two counters:\n\n- `code_iteration`\n  - counts completed code-review iterations only\n  - incremented by the code-review node\n\n- `code_attempt`\n  - counts total implementation passes through the code/verify/review cycle\n  - incremented at code-node entry\n  - includes retries caused by verification failure or review-requested revision\n\nTermination occurs when either limit is reached first:\n\n- `max_code_attempts`\n- `max_code_iterations`\n\nThis distinction matters when changing graph behavior or retry logic.\n\n---\n\n# Verification models\n\nDefined in `api/models/verification.py`.\n\nThese models capture deterministic verification configuration and the result of a single verification run.\n\n## `VerificationConfig`\n\nImmutable deployment-owned verification command configuration.\n\nFields:\n\n- `command: list[str] = [\"python\", \"-m\", \"pytest\"]`\n- `timeout_seconds: int = 600`\n- `max_output_bytes: int = 20000`\n\nValidation:\n\n- `command` must have at least one element\n- `timeout_seconds >= 1`\n- `max_output_bytes >= 1`\n\nThe docstring notes that this is deployment-owned in the current phase; per-task and YAML-based verification config is deferred.\n\n### Usage in the codebase\n\nThe call graph shows this model is used heavily by verification runner tests, which confirms it is the configuration boundary for verification execution.\n\n## `VerificationStatus`\n\n```python\nLiteral[\"passed\", \"failed\", \"error\", \"terminal_error\", \"timeout\"]\n```\n\nThese statuses distinguish normal test failure from infrastructure or startup failure.\n\n## `VerificationResult`\n\nImmutable structured outcome of a single verification run, anchored to a commit.\n\nFields:\n\n- `status: VerificationStatus`\n- `command: list[str]`\n- `exit_code: int | None = None`\n- `summary: str`\n- `output_excerpt: str`\n- `failing_tests: list[str] = []`\n- `baseline_only_failures: bool = False`\n- `verified_head_commit: str`\n- `duration_seconds: float = Field(ge=0.0)`\n- `attempt: int = Field(ge=1)`\n\n### Semantics\n\n- `status` captures the high-level outcome\n- `command` records what was executed\n- `exit_code` may be absent for startup/terminal failures\n- `summary` is the concise human-readable result\n- `output_excerpt` stores bounded execution output\n- `failing_tests` identifies failing tests when available\n- `baseline_only_failures` distinguishes pre-existing failures from regressions\n- `verified_head_commit` ties the result to a specific repository state\n- `attempt` tracks which verification pass produced the result\n\n### Integration points\n\n`VerificationResult` is embedded in:\n\n- `ImplementationState.verification`\n\nand verification artifacts can be persisted using:\n\n- `ArtifactType = \"verification\"`\n\n---\n\n# Validation patterns used across the module\n\n## Frozen models for durable or configuration data\n\nSeveral models use:\n\n```python\nmodel_config = ConfigDict(frozen=True)\n```\n\nThis is applied to contracts that should be immutable after validation, including:\n\n- `ToolPermission`\n- `ToolConfig`\n- `CompositeConfig`\n- `AgentConfig`\n- `ArtifactRecord`\n- `ReviewFeedback`\n- `TaskRecord`\n- `VerificationConfig`\n- `VerificationResult`\n\nThis reduces accidental mutation after construction.\n\n## Field-level normalization\n\nExamples:\n\n- stripping whitespace from names and descriptions\n- lowercasing permission levels\n- rejecting empty strings after normalization\n\n## Cross-field invariants\n\nExamples:\n\n- `GitHubTaskRequest` requires `issue` or `pr`\n- `TaskRequest` only allows `reuse_branch` with `follow_up_to_task_id`\n- `AgentConfig` requires exact alignment between `tools` and `tool_permissions`\n- `AgentConfig` enforces the `\"composite\"` sentinel/model pairing\n\n---\n\n# How this module connects to the rest of the codebase\n\n## Registry and configuration loading\n\n- `api/agents/registry.py` constructs `AgentConfig`\n- `api/tools/git.py` constructs `ToolConfig` and `ToolPermission`\n\nThese models are the validation boundary for startup-loaded or registry-defined configuration.\n\n## Task execution and graph state\n\n- planning and implementation graphs use `PlanningState` and `ImplementationState`\n- review nodes produce `ReviewFeedback`\n- verification nodes produce `VerificationResult`\n\nThese runtime models carry structured state between graph nodes.\n\n## Persistence and API responses\n\n- repositories return `TaskRecord` and `ArtifactRecord`\n- routes/services expose `TaskResponse` and `ArtifactResponse`\n- `TaskUpdate` and `ArtifactUpdate` support partial service-owned mutations\n\n## Domain error propagation\n\nService and infrastructure code raise `api/errors.py` exceptions to signal domain failures upward. Current flows show:\n\n- execution scope checks raising `ExecutionScopeError`\n- tool registry lookups raising `ToolNotFoundError`\n- skill discovery raising `SkillParseError`\n- memory governance raising `MemoryGovernanceError`\n\nThis keeps route and service code working with stable semantic failures instead of raw implementation exceptions.\n\n---\n\n# Contributor guidance\n\n## When to add a new model\n\nAdd a new model when one of these is true:\n\n- a request/response shape differs from persistence shape\n- runtime graph state needs structured fields\n- a subsystem boundary needs validation and normalization\n- a domain concept is currently represented as unstructured dicts or strings\n\nPrefer separate models over overloading one model for persistence, transport, and runtime state.\n\n## When to add a new exception\n\nAdd a new `DarkFactoryError` subclass when callers need to distinguish a failure category in a meaningful way.\n\nGood candidates:\n\n- lookup failures\n- policy/governance violations\n- invalid state transitions\n- integration boundary failures\n\nAvoid adding exceptions that merely mirror low-level implementation details unless they represent a stable domain concept.\n\n## Validation best practices in this module\n\n- use `Literal[...]` for closed sets of values\n- use `Field(...)` for numeric and length constraints\n- use `field_validator` for normalization of individual fields\n- use `model_validator(mode=\"after\")` for cross-field rules\n- freeze models that represent validated configuration or durable records\n\n## Package exports\n\nIf a new model should be imported from `api.models`, update:\n\n- `api/models/__init__.py` imports\n- `api/models/__init__.py::__all__`\n\nThis file is the public aggregation point for model consumers.\n\n---\n\n# Quick reference\n\n## Core exceptions\n\n- `DarkFactoryError`\n- `TaskNotFoundError`\n- `ArtifactNotFoundError`\n- `InvalidStateTransitionError`\n- `ExecutionScopeError`\n- `LLMParseError`\n- `ConfigurationError`\n- `AgentError`\n- `AgentNotFoundError`\n- `ToolNotFoundError`\n- `SkillNotFoundError`\n- `SkillParseError`\n- `MemoryGovernanceError`\n- `WorkerBusyError`\n- `InvalidTaskRequestError`\n\n## Core task models\n\n- `TaskCreate`\n- `TaskUpdate`\n- `TaskRecord`\n- `TaskRequest`\n- `TaskResponse`\n- `TaskDebugResponse`\n- `TaskContext`\n- `PlanningState`\n- `ImplementationState`\n\n## Core artifact/review/verification models\n\n- `ArtifactCreate`\n- `ArtifactUpdate`\n- `ArtifactRecord`\n- `ArtifactResponse`\n- `ReviewFeedback`\n- `VerificationConfig`\n- `VerificationResult`\n\n## Core agent/tool models\n\n- `ToolPermission`\n- `ToolConfig`\n- `CompositeConfig`\n- `AgentConfig`\n- `normalize_permission_level`\n- `permission_allows`","git-operations-and-workspace-execution":"# Git Operations and Workspace Execution\n\n# Git Operations and Workspace Execution\n\nThe `lib.git` package provides the service’s async interface to the Git CLI. It is responsible for:\n\n- cloning task workspaces\n- creating and resetting branches\n- managing detached worktrees\n- staging and committing changes\n- generating diffs\n- pushing branches or explicit refspecs\n- reading repository state without checking out files\n- cleaning up task clones\n- normalizing Git execution environment and authentication behavior\n\nThis module is intentionally thin: `GitClient` delegates almost all behavior to the `git` executable and wraps failures in a single domain-specific exception, `GitOperationError`.\n\n## Public API\n\n`lib/git/__init__.py` exports:\n\n- `GitClient`\n- `GitOperationError`\n\n## Core class: `GitClient`\n\n`GitClient` is defined in `lib/git/client.py` and acts as an async wrapper around subprocess-based Git commands.\n\n### Construction\n\n```python\nclient = GitClient()\n```\n\nBy default it uses `git` from `PATH`, but a different executable can be injected:\n\n```python\nclient = GitClient(git_executable=\"/usr/bin/git\")\n```\n\nThis is useful for tests or controlled runtime environments.\n\n---\n\n## Execution model\n\nAll Git operations eventually flow through `GitClient._run()`. That method:\n\n1. starts an async subprocess with `asyncio.create_subprocess_exec`\n2. sets the working directory via `cwd`\n3. builds a Git-specific environment with `_build_git_env(...)`\n4. captures `stdout` and `stderr`\n5. raises `GitOperationError` on non-zero exit\n6. returns trimmed `stdout` on success\n\n### Central command path\n\n```mermaid\nflowchart TD\n    A[GitClient public method] --> B[_run]\n    B --> C[_extract_push_remote]\n    B --> D[_build_git_env]\n    D --> E[_configure_https_push_auth]\n    E --> F[_build_https_push_auth_config]\n    E --> G[_append_git_config_env]\n    B --> H[git subprocess]\n    H --> I[stdout result or GitOperationError]\n```\n\nThis centralization is important because environment setup, auth injection, and error formatting are applied consistently across all Git commands.\n\n---\n\n## Repository lifecycle operations\n\n### `clone(source_repo: Path, destination: Path) -> Path`\n\nClones a repository into `destination`.\n\nBehavior:\n\n- if `destination` already exists, it is removed first via `cleanup_clone()`\n- ensures `destination.parent` exists\n- runs:\n\n```bash\ngit clone <source_repo> <destination>\n```\n\nNotes:\n\n- `source_repo` is resolved to an absolute path before cloning\n- this method is used in task clone setup flows in tests and likely in task workspace preparation code\n- cleanup-before-clone makes clone creation idempotent for a known destination path\n\n### `cleanup_clone(clone_path: Path) -> None`\n\nDeletes a clone directory if it exists.\n\nImplementation details:\n\n- no-op if the path does not exist\n- uses `asyncio.to_thread(shutil.rmtree, clone_path)` so filesystem deletion does not block the event loop\n\n### `cleanup_orphan_clones(clone_root: Path, active_task_ids: set[str], retained_task_ids: set[str] | None = None) -> list[Path]`\n\nRemoves clone directories under `clone_root` that are not associated with active tasks.\n\nA child directory is preserved if:\n\n- it is not a directory\n- its name is in `active_task_ids`\n- its name is in `retained_task_ids`\n\nReturns the list of removed paths.\n\nThis is the module’s bulk workspace cleanup primitive. It assumes clone directory names correspond to task IDs.\n\n---\n\n## Branch and worktree operations\n\n### `create_branch(repo_path: Path, branch_name: str) -> None`\n\nCreates and switches to a new branch:\n\n```bash\ngit checkout -b <branch_name>\n```\n\nUse this when the branch must not already exist.\n\n### `checkout_branch_from_ref(repo_path: Path, branch_name: str, *, start_point: str) -> None`\n\nResets or creates `branch_name` at `start_point` and checks it out:\n\n```bash\ngit checkout -B <branch_name> <start_point>\n```\n\nUse this when branch state should be forced to a known ref.\n\nThis is the safer primitive for “make branch X point at commit/ref Y” workflows.\n\n### `get_current_branch(repo_path: Path) -> str`\n\nReturns the current branch name using:\n\n```bash\ngit rev-parse --abbrev-ref HEAD\n```\n\n### `get_head_commit(repo_path: Path) -> str`\n\nReturns the SHA for `HEAD`:\n\n```bash\ngit rev-parse HEAD\n```\n\n### `get_ref_commit(repo_path: Path, ref_name: str) -> str`\n\nReturns the SHA for an arbitrary ref:\n\n```bash\ngit rev-parse <ref_name>\n```\n\nThis is used by task and publication flows that need to reconcile branch state without mutating the workspace.\n\n---\n\n## Worktree support\n\n### `add_detached_worktree(repo_path: Path, worktree_path: Path, *, ref_name: str) -> None`\n\nCreates a detached worktree at a specific ref:\n\n```bash\ngit worktree add --detach <worktree_path> <ref_name>\n```\n\nThis is useful when code needs a filesystem view of a commit or branch without switching the main clone’s checked-out branch.\n\n### `remove_worktree(repo_path: Path, worktree_path: Path, *, force: bool = False) -> None`\n\nRemoves a previously created worktree:\n\n```bash\ngit worktree remove <worktree_path>\n```\n\nIf `force=True`, adds `--force`.\n\n---\n\n## Commit and diff operations\n\n### `commit_all(repo_path: Path, message: str, *, allow_empty: bool = False, body: str | None = None) -> str`\n\nStages all changes and creates a commit.\n\nSequence:\n\n1. logs `\"Git commit started.\"`\n2. runs `git add -A`\n3. builds a `git commit` command\n4. optionally adds:\n   - a second `-m <body>` if `body` is provided\n   - `--allow-empty` if `allow_empty=True`\n5. resolves the resulting commit SHA with `get_head_commit()`\n6. logs `\"Git commit created.\"` with `head_commit`\n7. returns the commit SHA\n\nExample command shapes:\n\n```bash\ngit commit -m \"subject\"\ngit commit -m \"subject\" -m \"body\"\ngit commit -m \"subject\" --allow-empty\n```\n\nLogging context includes:\n\n- `repo_path`\n- `commit_subject`\n- `has_commit_body`\n- `head_commit` on success\n\nOn any exception, it logs `\"Git commit failed.\"` and re-raises.\n\n### `diff(repo_path: Path, *, base_ref: str | None = None, head_ref: str | None = None, paths: list[str] | None = None) -> str`\n\nReturns `git diff` output.\n\nRevision selection is delegated to `_build_revision_range(...)`.\n\nSupported forms:\n\n- no refs: `git diff`\n- both refs: `git diff <base>..<head>`\n- only one ref: `git diff <ref>`\n\nIf `paths` is provided, the command appends `--` and the path list.\n\nExamples:\n\n```python\nawait client.diff(repo_path)\nawait client.diff(repo_path, base_ref=\"main\", head_ref=\"feature\")\nawait client.diff(repo_path, head_ref=\"HEAD\", paths=[\"plan.md\"])\n```\n\n---\n\n## Push operations\n\n### `push(repo_path: Path, *, remote: str = \"origin\", branch_name: str) -> None`\n\nPushes a branch to a remote:\n\n```bash\ngit push <remote> <branch_name>\n```\n\nLogging context includes:\n\n- `repo_path`\n- redacted `remote`\n- `branch_name`\n\nIt logs start, failure, and completion.\n\n### `push_refspec(repo_path: Path, *, remote: str, source_ref: str, destination_ref: str) -> None`\n\nPushes an explicit refspec:\n\n```bash\ngit push <remote> <source_ref>:<destination_ref>\n```\n\nThis is useful when the source and destination names differ or when pushing a non-current ref.\n\nLogging context includes:\n\n- `repo_path`\n- redacted `remote`\n- `refspec`\n\nLike `push()`, it logs start/failure/completion.\n\n---\n\n## Read-only repository inspection\n\n### `has_working_tree_changes(repo_path: Path) -> bool`\n\nReturns whether the repository has any staged, unstaged, or untracked changes.\n\nImplementation:\n\n```bash\ngit status --short\n```\n\nAny non-empty output is treated as “has changes”.\n\n### `show_file_at_ref(repo_path: Path, *, ref_name: str, file_path: str) -> str`\n\nReads file contents directly from a ref without checking out that file into the working tree:\n\n```bash\ngit show <ref_name>:<file_path>\n```\n\nThis is an important integration point with task read APIs. The execution flow data shows it being used when resolving task plans from branches in request handling paths.\n\nBecause it goes through `_run()`, it also inherits the module’s environment setup and error handling.\n\n---\n\n## Error handling\n\n## `GitOperationError`\n\nDefined in `lib/git/errors.py`:\n\n```python\nclass GitOperationError(DarkFactoryError):\n    \"\"\"Raised when a git command fails.\"\"\"\n```\n\n`GitClient._run()` raises `GitOperationError` whenever the subprocess exits non-zero.\n\nThe error message includes:\n\n- the redacted command text\n- the working directory\n- stderr output, or a fallback message if stderr is empty\n\nExample shape:\n\n```text\ngit push https://***@github.com/org/repo.git branch failed in /workspace/repo: authentication failed\n```\n\nThis gives callers enough context to diagnose failures while avoiding credential leakage in URLs.\n\n---\n\n## Environment and authentication behavior\n\nA major responsibility of this module is building a safe, predictable environment for Git subprocesses.\n\n## `_build_git_env(push_remote: str | None = None) -> dict[str, str]`\n\nStarts from `os.environ` and applies defaults:\n\n- `GIT_AUTHOR_NAME`\n- `GIT_AUTHOR_EMAIL`\n- `GIT_COMMITTER_NAME`\n- `GIT_COMMITTER_EMAIL`\n\nDefaults are:\n\n- `DarkFactory`\n- `darkfactory@local.invalid`\n\nThese are only applied with `env.setdefault(...)`, so explicit environment values win.\n\nIf `push_remote` is provided, `_build_git_env()` also:\n\n- sets `GIT_TERMINAL_PROMPT=0` unless already set\n- attempts HTTPS auth configuration via `_configure_https_push_auth(...)`\n\n### Why auth setup happens in `_run()`\n\n`_run()` always calls:\n\n```python\nenv=_build_git_env(push_remote=_extract_push_remote(args))\n```\n\nThat means every Git command gets the normalized author/committer environment, and push commands additionally get non-interactive auth behavior.\n\nBecause `_extract_push_remote()` only returns a value for `push` commands, auth injection is effectively push-specific.\n\nOne subtle consequence is that non-push commands such as `show_file_at_ref()` still pass through `_build_git_env()`, but without a `push_remote`, so no push auth is configured.\n\n---\n\n## HTTPS push authentication\n\n### `_extract_push_remote(args: tuple[str, ...]) -> str | None`\n\nReturns the remote argument only when the command is a push and has at least two arguments:\n\n- `(\"push\", \"origin\", \"branch\")` → `\"origin\"`\n- `(\"show\", \"main:file.txt\")` → `None`\n\n### `_configure_https_push_auth(env: dict[str, str], *, push_remote: str) -> None`\n\nAttempts to derive an HTTP authorization header for HTTPS pushes and, if successful, injects it into Git config environment variables.\n\n### `_build_https_push_auth_config(push_remote: str) -> tuple[str, str] | None`\n\nBuilds a Git config key/value pair for per-remote HTTP auth.\n\nIt returns `None` unless all of the following are true:\n\n- `GH_TOKEN` exists in the process environment\n- `push_remote` parses as an `http` or `https` URL\n- the URL has a network location\n- the URL does not already contain user info (`@` in netloc)\n\nWhen valid, it produces:\n\n- key: `http.<scheme://host/path>.extraheader`\n- value: `AUTHORIZATION: basic <base64(x-access-token:GH_TOKEN)>`\n\nThis uses Git’s `http.<url>.extraheader` mechanism rather than embedding credentials in the remote URL.\n\n### `_append_git_config_env(env: dict[str, str], *, key: str, value: str) -> None`\n\nAppends one config entry using Git’s `GIT_CONFIG_COUNT`, `GIT_CONFIG_KEY_<n>`, and `GIT_CONFIG_VALUE_<n>` environment convention.\n\nThis allows multiple config entries to coexist without mutating global Git config files.\n\n### Practical implications\n\n- HTTPS pushes can authenticate using `GH_TOKEN` without rewriting remotes\n- interactive prompts are disabled for push commands\n- remotes that already contain credentials are left untouched\n- non-URL remotes such as `origin` do not get HTTPS header injection directly; Git will resolve them through repository config as usual\n\n---\n\n## Redaction and logging safety\n\n### `_redact_git_arg(arg: str) -> str`\n\nRedacts credentials embedded in URL-like command arguments.\n\nBehavior:\n\n- if the string does not contain `://`, it is returned unchanged\n- if URL parsing fails, it is returned unchanged\n- if the URL has no `@` in `netloc`, it is returned unchanged\n- otherwise, user info is replaced with `***`\n\nExample:\n\n```text\nhttps://token@github.com/org/repo.git\n```\n\nbecomes:\n\n```text\nhttps://***@github.com/org/repo.git\n```\n\nThis function is used in:\n\n- `_run()` when formatting failed command text\n- `push()` logging context\n- `push_refspec()` logging context\n\nThat keeps logs and exceptions useful without leaking secrets.\n\n---\n\n## Helper behavior\n\n## `_build_revision_range(base_ref, head_ref)`\n\nUsed by `diff()` to normalize revision arguments.\n\nRules:\n\n- both `None` → `None`\n- both present → `\"<base_ref>..<head_ref>\"`\n- one present → that single ref\n\nThis keeps `diff()` simple while preserving Git’s native revision syntax.\n\n---\n\n## Integration with workspace execution\n\nAlthough `lib.git` is a standalone package, it is tightly connected to task execution and workspace management elsewhere in the codebase.\n\n### Task clone lifecycle\n\nTests show `GitClient` participating in setup flows that:\n\n- prepare task clones\n- create task branches\n- commit generated artifacts such as plans\n- push publication branches\n- inspect branch heads during follow-up and reconciliation flows\n\nThe clone cleanup methods support long-lived task workspace directories where stale clones must be removed safely.\n\n### Read paths for task data\n\nExecution flow data shows `show_file_at_ref()` being used from task retrieval code:\n\n- route handler\n- task service\n- plan resolution\n- branch file read\n\nThis allows the service to read branch content directly from Git without switching the working tree.\n\n### Publication and reconciliation\n\n`get_ref_commit()` and `get_head_commit()` are used in publication-related flows to compare or recover branch state. `push()` and `push_refspec()` provide the write side of that workflow.\n\n---\n\n# Observability helpers for execution artifacts\n\nThe provided source also includes `api/observability.py`, which is not part of `lib.git` but is closely related to workspace execution because it records execution outcomes and summaries.\n\nThis file does not execute Git commands, but it helps persist and retrieve artifacts that describe what happened during branch setup, publication, and failure handling.\n\n## Artifact creation\n\n### `create_logged_artifact(executor, artifact) -> ArtifactRecord`\n\nPersists an artifact via `create_artifact(...)` and logs one success event with:\n\n- `artifact_id`\n- `artifact_type`\n\n### `persist_result_artifact(...) -> ArtifactRecord | None`\n\nCreates a standardized service-owned `\"result\"` artifact.\n\nBehavior:\n\n- returns `None` immediately if `executor is None`\n- merges caller metadata with required metadata fields via `_build_result_artifact_metadata(...)`\n- writes an `ArtifactCreate` record with:\n  - generated `artifact_id`\n  - `task_id`\n  - `type=\"result\"`\n  - `version`\n  - `agent=\"service\"`\n  - `content=summary`\n\nRequired metadata keys enforced here:\n\n- `node_name`\n- `loop_stage`\n\nOptional metadata keys are documented by constants:\n\n- `iteration`\n- `attempt`\n- `commit_anchor`\n- `branch_name`\n- `base_branch`\n- `publication_status`\n- `github_pr_number`\n- `failure_kind`\n- `head_commit`\n- `mode`\n\nThese constants define the metadata contract expected by downstream consumers.\n\n## Artifact lookup helpers\n\n### `find_latest_result_artifact(artifacts, *, node_name) -> ArtifactRecord | None`\n\nFilters artifacts to `\"result\"` artifacts whose metadata contains the requested `node_name`, then returns the newest by:\n\n1. `version`\n2. `created_at`\n3. `artifact_id`\n\n### `find_latest_execution_summary_artifact(artifacts) -> ArtifactRecord | None`\n\nReturns the newest artifact considered an execution summary, based on `_is_execution_summary_artifact(...)`.\n\n### `_is_execution_summary_artifact(artifact) -> bool`\n\nTreats an artifact as an execution summary when:\n\n- `artifact.type == \"result\"`\n- metadata is a dict\n- `metadata[\"node_name\"]` is in `EXECUTION_SUMMARY_NODE_NAMES`\n  - currently `{\"setup_branch\", \"publication\"}`\n- or `node_name == \"service\"` and `failure_kind` is present\n\nThis supports debug and projection flows that need the latest high-level execution outcome.\n\n---\n\n## Design characteristics\n\n## Thin wrapper, not a Git abstraction layer\n\n`GitClient` does not attempt to model repositories, refs, or index state in Python. It exposes a small set of task-oriented operations and relies on the Git CLI for semantics.\n\nBenefits:\n\n- behavior stays close to native Git\n- debugging is straightforward because commands map directly to shell equivalents\n- adding new operations is usually simple\n\nTradeoff:\n\n- callers need some Git knowledge\n- command composition and ref semantics remain visible at the API boundary\n\n## Async-friendly subprocess usage\n\nAll Git commands are async, which fits service code that may perform repository operations alongside other I/O.\n\nFilesystem deletion is also offloaded with `asyncio.to_thread(...)` where needed.\n\n## Safe-by-default logging\n\nCredential redaction and scoped logging are built into the command path, especially around push operations and commit creation.\n\n## Environment-driven identity and auth\n\nCommit identity and HTTPS push auth are configured through environment variables rather than repository-local config mutation. This keeps execution ephemeral and easier to reason about in containerized or worker-based environments.\n\n---\n\n## Common usage patterns\n\n### Prepare a fresh task clone and branch\n\n```python\nclient = GitClient()\n\nclone_path = await client.clone(source_repo, destination)\nawait client.create_branch(clone_path, \"task/my-task\")\n```\n\n### Reset a branch to a known ref\n\n```python\nawait client.checkout_branch_from_ref(\n    repo_path,\n    \"task/my-task\",\n    start_point=\"origin/main\",\n)\n```\n\n### Commit generated changes\n\n```python\ncommit_sha = await client.commit_all(\n    repo_path,\n    \"Write task plan\",\n    body=\"Generated by planner node.\",\n)\n```\n\n### Push a branch\n\n```python\nawait client.push(\n    repo_path,\n    remote=\"origin\",\n    branch_name=\"task/my-task\",\n)\n```\n\n### Read a file from a branch without checkout\n\n```python\nplan_text = await client.show_file_at_ref(\n    repo_path,\n    ref_name=\"task/my-task\",\n    file_path=\"plan.md\",\n)\n```\n\n### Remove stale clones\n\n```python\nremoved = await client.cleanup_orphan_clones(\n    clone_root,\n    active_task_ids={\"task-1\", \"task-2\"},\n    retained_task_ids={\"task-debug\"},\n)\n```\n\n---\n\n## Contribution notes\n\nWhen extending this module:\n\n- prefer adding small task-oriented methods that delegate to `_run()`\n- keep command construction explicit and close to the underlying Git CLI\n- preserve redaction for anything that may contain URLs or credentials\n- route all subprocess execution through `_run()` so environment setup and error handling remain consistent\n- avoid mutating global Git config; prefer environment-based config injection as done by `_append_git_config_env()`\n- maintain async behavior for subprocess and filesystem operations\n\nIf a new command can involve remote URLs or authentication, verify:\n\n- whether `_extract_push_remote()` logic is sufficient\n- whether logs and exceptions redact sensitive values\n- whether the command should disable interactive prompts\n\nIf a new execution step needs durable reporting, `api/observability.py` is the companion layer for persisting result artifacts that summarize what happened in Git-backed workspace execution.","github-integration-and-publication":"# GitHub Integration and Publication\n\n# GitHub Integration and Publication\n\nThis module owns the service-facing GitHub workflow for:\n\n- talking to the GitHub REST API,\n- extracting compact issue/PR context for prompts,\n- publishing task branches and pull requests,\n- reconciling publication state into the database,\n- and posting idempotent task-scoped comments back to GitHub.\n\nIt is split across two layers:\n\n- `lib/github/*`: transport, models, and GitHub-specific errors\n- `api/github/*`: application logic for comments, context building, and publication\n\n## Responsibilities at a glance\n\n### `lib/github`\nLow-level GitHub client surface:\n\n- `GitHubClient` protocol defines the async operations the rest of the app depends on\n- `AsyncGitHubClient` implements those operations over `httpx`\n- `Issue`, `PullRequest`, `Comment`, and `Review` are normalized Pydantic models\n- `GitHubAPIError`, `GitHubNotFoundError`, and `GitHubRateLimitError` standardize failure handling\n- `assert_authoritative_repo()` validates repo ownership/configuration\n\n### `api/github/comments.py`\nComment protocol for DarkFactory-owned GitHub comments:\n\n- embeds a stable HTML marker in comment bodies\n- parses and matches existing comments\n- formats comments consistently\n- upserts comments by editing the newest matching comment when possible\n\n### `api/github/context.py`\nBuilds compact GitHub issue/PR summaries for prompt injection:\n\n- filters out bot/system noise\n- sanitizes external text\n- summarizes issue bodies, comments, reviews, and diffs\n- extracts unresolved questions and human change requests\n\n### `api/github/publisher.py`\nPublication orchestration:\n\n- pushes task branches to GitHub\n- reconciles or creates PRs\n- persists PR numbers and publication checkpoints\n- evaluates review state\n- optionally merges PRs\n- publishes plan/status/ready-for-review comments\n\n---\n\n## Architecture\n\n```mermaid\nflowchart TD\n    A[publish_task_to_github] --> B[push branch via GitClient]\n    A --> C[resolve_task_pull_request]\n    C --> D[GitHubClient PR APIs]\n    A --> E[_determine_review_gate]\n    E --> F[list_pr_reviews/get_pull_request]\n    A --> G[_publish_task_comments]\n    G --> H[upsert_task_comment]\n    A --> I[persist_publication_checkpoint]\n```\n\nThe important design choice is that publication is **reconciling**, not fire-and-forget:\n\n- branch pushes are retried by rerunning publication,\n- PR creation is deduplicated by branch pair and persisted PR number,\n- comments are deduplicated by embedded markers,\n- publication status is checkpointed in the task row.\n\n---\n\n# GitHub transport layer (`lib/github`)\n\n## Models\n\n`lib/github/models.py` defines the normalized objects used everywhere else:\n\n- `Issue`\n- `PullRequest`\n- `Comment`\n- `Review`\n\nThese models convert GitHub payloads into a stable internal shape. Notable normalization:\n\n- `PullRequest.state` can be `\"merged\"` even though GitHub’s REST payload may report merge state indirectly via `merged_at`\n- `Comment.is_bot` is derived from the GitHub user payload\n- timestamps are parsed into `datetime`\n\nThese models are the contract consumed by `api/github/context.py`, `api/github/comments.py`, and `api/github/publisher.py`.\n\n## Errors\n\n`lib/github/errors.py` defines:\n\n- `GitHubAPIError`: generic GitHub failure\n- `GitHubRateLimitError`: rate limiting / exhausted quota\n- `GitHubNotFoundError`: 404s\n\nThe publisher relies on these exceptions for recovery behavior, especially when checking persisted PR references.\n\n## `GitHubClient` protocol\n\n`lib/github/client.py` exposes a `GitHubClient` `Protocol` so higher-level code can depend on behavior rather than a concrete implementation.\n\nSupported operations include:\n\n- issue reads: `get_issue()`, `list_issues()`\n- PR reads: `get_pull_request()`, `list_pull_requests()`\n- comments/reviews: `list_issue_comments()`, `list_pr_comments()`, `list_pr_reviews()`\n- comment writes: `post_comment()`, `edit_comment()`\n- PR writes: `create_pull_request()`, `merge_pull_request()`\n- labels: `add_labels()`\n\nThis protocol is what `api/github/*` accepts in function signatures, which makes testing straightforward.\n\n## `AsyncGitHubClient`\n\n`AsyncGitHubClient` is the concrete REST implementation.\n\n### Request behavior\n\nAll API methods eventually go through `_request()`:\n\n- sends the HTTP request with `httpx.AsyncClient`\n- retries on `429` and `5xx`\n- uses `Retry-After` when present\n- otherwise computes exponential backoff with jitter via `_compute_retry_delay()`\n- raises typed exceptions through `_raise_api_error()`\n\n### Pagination\n\nList endpoints use `_get_paginated()`:\n\n- follows GitHub `Link` headers via `_parse_next_link()`\n- accumulates all pages before returning normalized models\n\n### Payload normalization\n\nResponse payloads are converted by:\n\n- `_build_issue()`\n- `_build_pull_request()`\n- `_build_comment()`\n- `_build_review()`\n\nThese helpers validate payload shape and raise `GitHubAPIError` on malformed responses.\n\n### Repo validation\n\n`assert_authoritative_repo(task_repo, authoritative_repo)` ensures a task is targeting the configured authoritative repository. Comparison is case-insensitive and trims whitespace.\n\nUse this before publication when repo ownership must be enforced.\n\n---\n\n# Comment reconciliation (`api/github/comments.py`)\n\nThis file defines the protocol for DarkFactory-owned comments on issues and PRs.\n\n## Why markers exist\n\nGitHub comments do not provide a native idempotency key. To safely update service-owned comments instead of posting duplicates, each managed comment includes an HTML marker:\n\n```html\n<!-- darkfactory:{task_id}:{comment_type}:v{version} -->\n```\n\nExample:\n\n```html\n<!-- darkfactory:task-123:status_update:v1 -->\n```\n\nThis marker is invisible in rendered GitHub UI but stable in the raw body.\n\n## Supported comment types\n\n`CommentType` is a `Literal` of:\n\n- `\"plan_summary\"`\n- `\"review_outcome\"`\n- `\"status_update\"`\n- `\"pr_ready\"`\n\nTitles are mapped internally by `_COMMENT_TITLES`.\n\n## Core functions\n\n### `DarkFactoryCommentMarker`\nImmutable Pydantic model representing the parsed/rendered marker:\n\n- `task_id: str`\n- `comment_type: CommentType`\n- `version: int = 1`\n\n### `render_comment_marker(marker)`\nReturns the HTML marker string.\n\n### `parse_comment_marker(body)`\nSearches a comment body for a DarkFactory marker and returns `DarkFactoryCommentMarker | None`.\n\nThis is used both for reconciliation and for filtering service-generated comments out of prompt context.\n\n### `format_task_comment(...)`\nBuilds the full GitHub comment body:\n\n- marker\n- markdown heading based on `comment_type`\n- normalized content\n- footer with task id, agent, and version\n\nThe footer format is:\n\n```md\n*DarkFactory | Task: {task_id} | Agent: {agent} | v{version}*\n```\n\n### `find_matching_task_comment(comments, task_id, comment_type)`\nFinds the newest matching DarkFactory comment for a given task/type pair.\n\nMatching is based on parsed marker contents, not title/body text. If multiple matches exist, the newest is selected by:\n\n1. `updated_at`\n2. `created_at`\n3. `id`\n\n### `upsert_task_comment(...)`\nImplements idempotent comment publication:\n\n1. format the desired body with `format_task_comment()`\n2. fetch all issue comments with `github_client.list_issue_comments()`\n3. locate an existing matching comment with `find_matching_task_comment()`\n4. if none exists, call `github_client.post_comment()`\n5. otherwise call `github_client.edit_comment()`\n\nNote that PR comments are posted through the issue-comments API surface, which matches GitHub’s issue/PR comment model.\n\n## Important behavior\n\n### Matching ignores version\n`find_matching_task_comment()` matches only on:\n\n- `task_id`\n- `comment_type`\n\nIt does **not** require the existing marker version to match. A newer body with a different `version` still edits the existing comment for that task/type.\n\n### Marker-aware filtering elsewhere\n`api/github/context.py` uses `parse_comment_marker()` to exclude DarkFactory-managed comments from issue context, preventing the system from feeding its own generated comments back into prompts.\n\n---\n\n# GitHub context building (`api/github/context.py`)\n\nThis module converts raw GitHub issue/PR data into compact prompt-ready summaries.\n\n## Main entry point\n\n### `load_task_github_context(...)`\nLoads issue and PR context concurrently with `asyncio.gather()`.\n\nInputs:\n\n- `github_client: GitHubClient`\n- `git_client: GitClient`\n- `repo: str`\n- `task_created_at: datetime`\n- `target_repo_path: Path`\n- optional `issue_number`\n- optional `pr_number`\n\nReturns:\n\n```python\ntuple[IssueContext | None, PRContext | None]\n```\n\nIf either number is absent, that side returns `None`.\n\n## Issue context\n\n### `IssueContext`\nContains:\n\n- `number`\n- `title`\n- `body_summary`\n- `labels`\n- `comment_count`\n- `recent_comments`\n- `unresolved_questions`\n\n### `build_issue_context(...)`\nFetches the issue and its comments concurrently, then:\n\n- filters comments through `_should_include_issue_comment()`\n- keeps only recent relevant comments\n- summarizes the issue body\n- extracts unresolved questions/requirements from comments\n\n### Comment filtering rules\n\n`_should_include_issue_comment()` excludes comments when:\n\n- `comment.is_bot` is true\n- author login looks bot-like via `_is_probable_bot_login()`\n- the body contains a DarkFactory marker via `parse_comment_marker()`\n- the comment was authored by `\"darkfactory\"` and predates task creation\n- the sanitized body is empty\n\nThis keeps prompt context focused on human-authored, relevant discussion.\n\n### Unresolved question extraction\n\n`_extract_unresolved_questions()` scans candidate sentences from relevant comments and keeps those that look like:\n\n- questions (`\"?\"` present), or\n- requirement-like statements starting with prefixes such as:\n  - `\"please \"`\n  - `\"must \"`\n  - `\"should \"`\n  - `\"need to \"`\n  - `\"do not \"`\n\nResults are summarized, deduplicated, and capped by `MAX_UNRESOLVED_ITEMS`.\n\n## PR context\n\n### `PRContext`\nContains:\n\n- `number`\n- `title`\n- `base_branch`\n- `head_branch`\n- `head_sha`\n- `diff_summary`\n- `review_state`\n- `recent_reviews`\n- `human_change_requests`\n\n### `build_pr_context(...)`\nFetches the PR and reviews concurrently, then:\n\n- removes bot-authored reviews\n- sorts reviews by submission time/id\n- keeps recent reviews\n- computes a diff summary using the local git checkout\n- extracts human change requests\n\n### Human change request extraction\n\n`_extract_human_change_requests()` works from ordered human reviews:\n\n- if a review body contains requirement/question-like sentences, those are summarized\n- otherwise, a `CHANGES_REQUESTED` review contributes a fallback summary:\n  - `\"{author}: requested changes on the PR.\"`\n\nResults are deduplicated and capped by `MAX_CHANGE_REQUESTS`.\n\n### Diff summary generation\n\n`_build_diff_summary()` uses `git_client.diff()` between PR base and head branches and derives:\n\n- changed file list\n- addition count\n- deletion count\n\nIf the diff cannot be computed, it returns a fallback message indicating the branch may not yet be available in the authoritative repository.\n\nThis is intentionally lightweight: it parses unified diff text rather than relying on GitHub file APIs.\n\n## Prompt formatting\n\n### `format_github_context_for_prompt(issue_context, pr_context)`\nRenders one or both contexts into a compact plain-text block for prompt injection.\n\nIf neither exists, returns:\n\n```text\nGitHub context: none.\n```\n\nFormatting is delegated to:\n\n- `_format_issue_context()`\n- `_format_pr_context()`\n\n## Sanitization and summarization\n\nExternal GitHub text is normalized before use.\n\n### `_sanitize_external_text(text, max_chars)`\nPerforms:\n\n- removal of control characters except `\\n` and `\\t`\n- whitespace compaction per line\n- trimming\n- truncation with `...` when over limit\n\n### `_summarize_text(text, max_chars)`\nWraps `_sanitize_external_text()` and falls back to `\"(empty)\"`.\n\nThis sanitization is important because issue bodies, comments, and reviews are untrusted external input.\n\n---\n\n# Publication workflow (`api/github/publisher.py`)\n\nThis is the orchestration layer that turns a durable task branch into GitHub-visible publication state.\n\n## Core types\n\n### `PublicationState`\nAllowed states:\n\n- `\"published\"`\n- `\"pr_open\"`\n- `\"awaiting_human_review\"`\n- `\"blocked\"`\n- `\"merged\"`\n- `\"failed\"`\n\nThese are persisted onto the task row.\n\n### `PublicationConfig`\nPublication defaults:\n\n- `auto_push: bool = True`\n- `auto_pr: bool = True`\n- `auto_merge: bool = False`\n- `pr_base_branch: str = \"main\"`\n- `merge_method: Literal[\"merge\", \"squash\", \"rebase\"] = \"squash\"`\n\n### `PublicationResult`\nReturned by `publish_task_to_github()`:\n\n- `publication_status`\n- `publication_error`\n- `github_pr_number`\n\n### `ReviewGateDecision`\nInternal decision object returned by `_determine_review_gate()`:\n\n- `publication_status`\n- `should_merge`\n- `should_post_ready_comment`\n- `blocking_review`\n\n---\n\n## Main entry point: `publish_task_to_github(...)`\n\nThis function publishes an already-durable task branch to GitHub and reconciles task state.\n\n### Inputs\n\n- database executor (`asyncpg.Connection | asyncpg.Pool`)\n- `github_client`\n- `git_client`\n- local repo path\n- `github_push_url`\n- `task: TaskRecord`\n- optional `PublicationConfig`\n\n### High-level flow\n\n1. If no `task.github_repo` or `auto_push` is disabled, return `\"published\"` without GitHub side effects.\n2. Push the task branch with `_push_task_branch_to_github()`.\n3. If `auto_pr` is disabled, return `\"published\"`.\n4. Resolve or create the PR with `resolve_task_pull_request()`.\n5. Persist checkpoint `\"pr_open\"`.\n6. Determine review gate with `_determine_review_gate()`.\n7. If configured and allowed, merge the PR.\n8. Persist final publication checkpoint.\n9. Publish GitHub comments with `_publish_task_comments()`.\n10. Return `PublicationResult`.\n\n### Failure handling\n\nThe function is intentionally checkpointed:\n\n- branch push failure → persist `\"failed\"`\n- PR reconcile failure → persist `\"failed\"`\n- review gate lookup failure → keep `\"pr_open\"` and store error\n- merge failure → persist `\"failed\"`\n- comment publication failure → keep final publication status but store error\n\nThis lets recovery code rerun publication without losing progress.\n\n---\n\n## Branch publication\n\n### `_push_task_branch_to_github(...)`\nPushes:\n\n- source ref: `refs/heads/{branch_name}`\n- destination ref: same\n\nusing `git_client.push_refspec()`.\n\nIf `github_push_url` is missing, it raises `GitHubAPIError`.\n\nThis is the bridge from local durable git state to the authoritative GitHub repository.\n\n---\n\n## Pull request reconciliation\n\nPR creation is designed to be idempotent and restart-safe.\n\n### `reconcile_pull_request(...)`\nLooks for an existing open PR for the exact `(head_branch, base_branch)` pair using `find_existing_pull_request()`. If found, returns it; otherwise creates one with `github_client.create_pull_request()`.\n\n### `find_existing_pull_request(...)`\nScans open PRs and matches with `_matches_branch_pair()`:\n\n- PR must be `\"open\"`\n- `head_branch` must match\n- `base_branch` must match\n\n### `persist_pull_request_number(...)`\nImmediately persists the reconciled PR number to the task row.\n\nBehavior:\n\n- if the same number is already stored, returns current task unchanged\n- if a different PR number is already stored, raises `GitHubAPIError`\n- otherwise updates the task with `TaskUpdate(github_pr_number=...)`\n\nThis protects against accidental relinking of a task to a different PR.\n\n### `reconcile_and_persist_pull_request(...)`\nConvenience wrapper:\n\n1. reconcile/create PR\n2. persist PR number\n3. return PR\n\n### `resolve_task_pull_request(...)`\nRecovery-aware PR resolution:\n\n1. if `task.github_pr_number` exists, try `_load_persisted_pull_request()`\n2. if the persisted PR still matches task linkage, reuse it\n3. otherwise reconcile by branch pair and persist\n\n### `_load_persisted_pull_request(...)`\nFetches the stored PR number and validates linkage with `_matches_task_linkage()`.\n\nIf the PR is missing, returns `None`.\n\nIf the PR exists but points to a different branch pair than expected, raises `GitHubAPIError`. This is a strong consistency check: persisted linkage must not silently drift.\n\n---\n\n## Publication checkpoints\n\n### `persist_publication_checkpoint(...)`\nUpdates the task row with:\n\n- `publication_status`\n- `publication_error`\n\nThis is called throughout `publish_task_to_github()` after meaningful side effects.\n\nCheckpointing is what makes publication recovery possible from higher-level task services.\n\n---\n\n## Review gating and merge behavior\n\n### `_determine_review_gate(...)`\nDetermines what publication should do after PR creation.\n\nFlow:\n\n1. require `repo` to be configured, otherwise raise `GitHubAPIError`\n2. refresh the current PR with `get_pull_request()`\n3. if PR state is already `\"merged\"`, return merged decision\n4. fetch reviews with `list_pr_reviews()`\n5. find a blocking human review with `find_blocking_human_review()`\n6. if blocking review exists, return `\"blocked\"`\n7. if `publication_config.auto_merge` is enabled, return `\"merged\"` with `should_merge=True`\n8. otherwise return `\"awaiting_human_review\"` with `should_post_ready_comment=True`\n\n### `find_blocking_human_review(reviews)`\nImplements “latest review per human author wins” semantics:\n\n- reviews are sorted by `(submitted_at is None, submitted_at, id)`\n- bot authors are skipped via `_is_bot_author()`\n- only the latest review from each author is retained\n- if any retained review has `state == \"CHANGES_REQUESTED\"`, the newest such review is returned\n\nThis avoids stale change requests from an author continuing to block after they later approve or comment.\n\n### Merge execution\nIf `_determine_review_gate()` returns `should_merge=True`, `publish_task_to_github()` calls:\n\n```python\ngithub_client.merge_pull_request(task.github_repo, pull_request.number, method=config.merge_method)\n```\n\nOn success, publication proceeds as `\"merged\"`.\n\n---\n\n## Comment publication during publication flow\n\n### `_publish_task_comments(...)`\nPublishes task-scoped comments after review gating.\n\nIt may post to two different GitHub objects:\n\n- the linked issue (`task.github_issue_number`) for plan/status updates\n- the PR for ready-for-review comments or fallback status updates\n\nBehavior:\n\n1. if the task is linked to an issue, upsert a `\"plan_summary\"` comment there\n2. upsert a `\"status_update\"` comment using `upsert_status_update_comment()`\n3. if `post_ready_comment` is true, upsert a `\"pr_ready\"` comment on the PR\n\n### `upsert_status_update_comment(...)`\nChooses the target number with `_resolve_status_update_target_number()`:\n\npriority order:\n\n1. linked issue number\n2. explicit `github_pr_number` argument\n3. persisted `task.github_pr_number`\n\nIf no target exists, it does nothing.\n\n### `_upsert_logged_task_comment(...)`\nWraps `upsert_task_comment()` with structured logging context:\n\n- `loop_stage=\"publication\"`\n- `branch_name`\n- optional `github_pr_number`\n- `issue_or_pr_number`\n- `comment_type`\n\nIt logs start, failure, and success around the actual upsert.\n\n### Comment body builders\n\n- `build_pre_publication_status_comment(task, milestone, detail=None)`\n- `_build_plan_summary_comment(executor, task)`\n- `_build_status_update_comment(task, pull_request, publication_status=...)`\n- `_build_ready_for_review_comment(task, pull_request)`\n\nThese all eventually produce markdown content passed into `api/github/comments.py`, which adds the marker/title/footer wrapper.\n\n---\n\n## Pull request body generation\n\n### `_build_pull_request_body(...)`\nBuilds the PR body from:\n\n- `task.description`\n- a link to `.darkfactory/plans/{task.task_id}.md`\n- approved plan summary from artifacts or plan path\n- diff summary between base and task branch\n\nIt loads artifacts with `list_artifacts_for_task()` and uses `_build_approved_plan_summary()` to prefer the latest `\"plan\"` result artifact.\n\n### `_build_approved_plan_summary(task, artifacts)`\nResolution order:\n\n1. latest plan artifact content, if non-empty\n2. `task.plan_path`, if present\n3. fallback generic approval message\n\n### `_build_pull_request_title(task)`\nUses `task.description.strip()`.\n\n---\n\n## Logging\n\nPublication code consistently logs through:\n\n- `_log_publication_info()`\n- `_log_publication_warning()`\n\nBoth attach structured context from `_build_publication_log_context()`.\n\nCommon fields include:\n\n- `loop_stage=\"publication\"`\n- `branch_name`\n- `publication_status`\n- `github_pr_number`\n- `issue_or_pr_number`\n- `comment_type`\n\nThis is important when tracing recovery flows initiated from task routes/services.\n\n---\n\n# How the pieces connect\n\n## Prompt-time GitHub usage\n\nWhen task execution needs GitHub context:\n\n1. higher-level task code calls `load_task_github_context()`\n2. issue and/or PR context is fetched and summarized\n3. `format_github_context_for_prompt()` renders compact text\n4. prompt consumers receive human-focused GitHub state without DarkFactory comment noise\n\n## Publication-time GitHub usage\n\nWhen a task branch is ready to publish:\n\n1. higher-level task recovery/publication service calls `publish_task_to_github()`\n2. branch is pushed via `GitClient`\n3. PR is reused or created via `GitHubClient`\n4. PR number and publication checkpoints are persisted via task repository functions\n5. review state is evaluated from current GitHub reviews\n6. comments are reconciled via marker-based upserts\n\n## Shared conventions\n\nA few conventions are reused across the module:\n\n- **Bot filtering**: both context and review gating distinguish human vs bot activity\n- **Idempotency**:\n  - PRs by branch pair / persisted PR number\n  - comments by embedded marker\n- **Recovery safety**:\n  - publication checkpoints persisted after side effects\n  - persisted PR linkage validated before reuse\n- **External text sanitization**:\n  - prompt-facing context never uses raw GitHub text directly\n\n---\n\n# Key implementation details and contributor notes\n\n## Issue comments vs PR comments\n\n`upsert_task_comment()` uses `list_issue_comments()` / `post_comment()` for both issues and PRs. This is intentional because GitHub PR conversation comments live on the issues API surface.\n\nDo not switch this to review comments unless the product requirement changes.\n\n## Persist PR number immediately after reconciliation\n\n`persist_pull_request_number()` is called right after PR reconciliation. This is critical for restart safety. If publication crashes after PR creation but before later steps, recovery can resume from the stored PR number.\n\n## Persisted PR linkage is strict\n\n`_load_persisted_pull_request()` does not silently accept a mismatched PR. If the stored PR number points to a different branch pair, it raises `GitHubAPIError`.\n\nThat protects task-to-PR integrity and should remain strict.\n\n## Review gating is based on latest review per author\n\n`find_blocking_human_review()` intentionally collapses multiple reviews from the same human author to their latest one. This matches GitHub review workflows better than treating every historical `CHANGES_REQUESTED` as permanently blocking.\n\n## Context extraction is heuristic, not semantic\n\nFunctions like `_looks_like_question_or_requirement()` and `_extract_candidate_sentences()` are lightweight heuristics. They are designed for compact prompt context, not perfect NLP classification.\n\nIf you change them, preserve:\n\n- deterministic behavior,\n- bounded output sizes,\n- and resistance to noisy external text.\n\n## There are two diff-summary implementations\n\nBoth `api/github/context.py` and `api/github/publisher.py` contain a private `_build_diff_summary()` with similar logic but different output caps/constants.\n\nThey serve different consumers:\n\n- prompt context\n- PR body generation\n\nIf you refactor them into shared code, preserve current behavior and output expectations.\n\n---\n\n# Function reference by concern\n\n## Comment protocol\n- `render_comment_marker()`\n- `parse_comment_marker()`\n- `format_task_comment()`\n- `find_matching_task_comment()`\n- `upsert_task_comment()`\n\n## Context loading\n- `load_task_github_context()`\n- `build_issue_context()`\n- `build_pr_context()`\n- `format_github_context_for_prompt()`\n\n## Publication orchestration\n- `publish_task_to_github()`\n- `resolve_task_pull_request()`\n- `reconcile_pull_request()`\n- `reconcile_and_persist_pull_request()`\n- `persist_pull_request_number()`\n- `persist_publication_checkpoint()`\n- `upsert_status_update_comment()`\n- `build_pre_publication_status_comment()`\n- `find_blocking_human_review()`\n\n## GitHub transport\n- `AsyncGitHubClient`\n- `GitHubClient`\n- `assert_authoritative_repo()`\n\n---\n\n# Typical publication lifecycle\n\nA normal successful run with default config looks like this:\n\n1. `publish_task_to_github()` starts\n2. `_push_task_branch_to_github()` pushes `refs/heads/{task.branch_name}`\n3. `resolve_task_pull_request()` reuses or creates the PR\n4. `persist_pull_request_number()` stores the PR number\n5. `persist_publication_checkpoint(..., \"pr_open\")`\n6. `_determine_review_gate()` checks current PR state and reviews\n7. with `auto_merge=False`, result is usually `\"awaiting_human_review\"`\n8. `persist_publication_checkpoint(..., \"awaiting_human_review\")`\n9. `_publish_task_comments()` upserts:\n   - `\"plan_summary\"` on the issue, if linked\n   - `\"status_update\"` on issue or PR\n   - `\"pr_ready\"` on the PR\n10. `PublicationResult(publication_status=\"awaiting_human_review\", github_pr_number=...)`\n\nIf `auto_merge=True` and there are no blocking human reviews, step 7 becomes merge execution and final status `\"merged\"`.\n\nIf a human’s latest review is `CHANGES_REQUESTED`, final status becomes `\"blocked\"` and no ready-for-review comment is posted.\n\n---\n\n# Testing implications\n\nThis module is designed to be testable because higher-level code depends on:\n\n- the `GitHubClient` protocol rather than `AsyncGitHubClient` directly,\n- pure helpers for formatting and matching,\n- explicit persistence functions for PR numbers and publication checkpoints.\n\nWhen adding behavior, prefer preserving this separation:\n\n- transport concerns in `lib/github/client.py`\n- formatting/reconciliation helpers in `api/github/comments.py`\n- prompt summarization in `api/github/context.py`\n- orchestration and persistence in `api/github/publisher.py`\n\nThat separation is what keeps publication recovery and GitHub integration understandable.","other-agents-md":"# Other — AGENTS.md\n\n# AGENTS.md\n\n`AGENTS.md` is a repository-level contributor guide and architecture snapshot. It is not executable code; instead, it defines how developers and coding agents should understand the project layout, where responsibilities belong, and how to use GitNexus safely when navigating or modifying the codebase.\n\nThis file serves two roles:\n\n- a **high-level map** of the DarkFactory codebase\n- an **operational policy** for code intelligence workflows, especially before editing symbols or committing changes\n\nBecause it is documentation rather than runtime logic, it has no call graph, no incoming or outgoing calls, and no execution flows of its own. Its importance is architectural and procedural.\n\n## Purpose\n\n`AGENTS.md` helps contributors answer three questions quickly:\n\n1. **Where should code live?**\n2. **What parts of the system exist now vs. later phases?**\n3. **What analysis steps are required before making changes?**\n\nIt is especially useful for:\n\n- onboarding to the repository\n- locating the correct module for a feature or fix\n- avoiding architectural drift\n- using GitNexus in a disciplined way during debugging and refactoring\n\n## Project Structure Snapshot\n\nThe file describes the intended top-level layout of the repository across implementation phases.\n\n### `api/`\n\nThe `api/` tree contains server-side application logic and most of the domain-specific runtime behavior.\n\nKey subareas:\n\n- `routes/`  \n  FastAPI route handlers. These should remain thin and delegate business logic elsewhere.\n\n- `services/`  \n  Higher-level orchestration such as graph invocation, cron-like behavior, and heartbeat processing.\n\n- `graphs/`  \n  LangGraph `StateGraph` definitions for workflows like planning and implementation.\n\n- `nodes/`  \n  Individual graph node functions such as planner, reviewer, and coder steps.\n\n- `tools/`  \n  Runtime tool boundary, including registry/adapters, permission enforcement, and custom git tooling.\n\n- `agents/`  \n  Agent creation and invocation helpers, including `AgentRegistry`, `create_agent`, and `ReadOnlyBackend`.\n\n- `models/`  \n  Domain models owned by the API layer. The snapshot calls out specific files:\n  - `tasks.py`\n  - `review.py`\n  - `feedback.py`\n  - `agents.py`\n  - `triage.py`\n  - `streaming.py`\n\n- `repositories/`  \n  Database access for persisted entities such as tasks, artifacts, feedback items, and events.\n\n- Additional planned areas include:\n  - `feedback/`\n  - `review/`\n  - `termination/`\n  - `github/`\n  - `skills/`\n  - `memory/`\n  - `coding/`\n  - `triage/`\n  - `streaming/`\n  - `queue/`\n  - `config/`\n\n### `lib/`\n\n`lib/` is reserved for reusable infrastructure code that should stay independent of:\n\n- database access\n- FastAPI\n- LangGraph\n\nThe snapshot identifies:\n\n- `lib/git/`\n- `lib/github/client.py`\n- `lib/github/models.py`\n- `lib/config.py`\n\nThis separation is important: code in `lib/` should remain portable and lower-level than `api/`.\n\n### `workspace/`\n\n`workspace/` is described as a runtime extension area that is:\n\n- always writable\n- never rebuilt\n\nIt contains mutable project assets such as:\n\n- prompt templates in `workspace/prompts/`\n- skill definitions in `workspace/skills/`\n- memory/history documents in `workspace/memory/`\n- YAML configs in `workspace/config/`\n- heartbeat definitions in `workspace/HEARTBEAT.md`\n\nThis distinction matters when deciding whether something belongs in source-controlled application code versus runtime-editable content.\n\n## Architectural Boundaries\n\nThe most important architectural guidance in `AGENTS.md` is not the directory list itself, but the implied boundaries between layers.\n\n### Thin transport layer\n\n`api/routes/` should dispatch requests and avoid embedding business logic. If route handlers become orchestration-heavy, that logic likely belongs in `services/`, `graphs/`, or `nodes/`.\n\n### Graph-oriented workflow design\n\nThe snapshot separates:\n\n- workflow definitions in `api/graphs/`\n- executable step logic in `api/nodes/`\n\nThat split suggests a pattern where graph topology and state transitions are defined independently from the implementation of each node.\n\n### Tool and agent isolation\n\nThe file distinguishes:\n\n- `api/tools/` for runtime tool boundaries and permissions\n- `api/agents/` for agent construction and invocation\n\nThis helps keep tool execution concerns separate from agent lifecycle/configuration concerns.\n\n### Infrastructure vs. application code\n\n`lib/` is explicitly constrained to avoid DB, FastAPI, and LangGraph dependencies. That makes it the right place for reusable clients and configuration primitives, while `api/` owns application-specific orchestration.\n\n### Mutable runtime content\n\n`workspace/` is intentionally treated differently from source modules. Prompts, skills, memory, and YAML configs are expected to evolve at runtime and should not be modeled like static Python packages.\n\n## MVP Scope\n\nThe file explicitly identifies the minimum viable implementation from “doc 01”.\n\nThe MVP includes:\n\n- `routes/`\n- `graphs/`\n- `nodes/`\n- `models/`\n- `repositories/`\n- `agents/`\n  - factory\n  - `ReadOnlyBackend`\n  - invoke helpers\n- `tools/git.py`\n  - custom git tool\n\nThis is useful when comparing the documented architecture to the actual repository state:\n\n- if a planned directory is missing, that may be expected\n- if a module exists outside the MVP, it may belong to a later phase\n- if code appears in the wrong layer, it may indicate drift from the intended design\n\n## GitNexus Guidance\n\nThe second half of `AGENTS.md` defines repository workflow rules for GitNexus-based code intelligence.\n\nThese are process requirements, not suggestions.\n\n### Required before editing\n\nBefore modifying any function, class, or method, contributors must run:\n\n- `gitnexus_impact({target: \"symbolName\", direction: \"upstream\"})`\n\nThe purpose is to identify:\n\n- direct callers\n- affected processes\n- overall risk level\n\nIf the result is `HIGH` or `CRITICAL`, the contributor must warn the user before proceeding.\n\n### Required before committing\n\nBefore committing changes, contributors must run:\n\n- `gitnexus_detect_changes()`\n\nThis verifies that the modified scope matches expectations and helps catch accidental edits or broader-than-intended impact.\n\n### Required for unfamiliar code\n\nWhen exploring behavior by concept rather than symbol name, the file recommends:\n\n- `gitnexus_query({query: \"concept\"})`\n\nThis is preferred over blind grep because it returns process-grouped results and execution-flow context.\n\n### Required for symbol-level understanding\n\nFor a full view of a symbol’s role, use:\n\n- `gitnexus_context({name: \"symbolName\"})`\n\nThis is the primary way to inspect callers, callees, and process participation before refactoring or debugging.\n\n### Required for renames\n\nRenames must use:\n\n- `gitnexus_rename({symbol_name: \"old\", new_name: \"new\", dry_run: true})`\n\nThen, after reviewing the preview:\n\n- rerun with `dry_run: false`\n\nThe file explicitly forbids find-and-replace renames because they ignore graph-aware references.\n\n## Debugging Workflow\n\n`AGENTS.md` provides a concrete debugging sequence:\n\n1. `gitnexus_query({query: \"<error or symptom>\"})`\n2. `gitnexus_context({name: \"<suspect function>\"})`\n3. read `gitnexus://repo/darkfactory/process/{processName}`\n4. for regressions, run `gitnexus_detect_changes({scope: \"compare\", base_ref: \"main\"})`\n\nThis sequence encourages moving from symptom → suspect symbol → full process trace → regression comparison.\n\n## Refactoring Workflow\n\nFor refactors, the file emphasizes graph-aware analysis:\n\n- inspect symbol context first with `gitnexus_context`\n- assess upstream blast radius with `gitnexus_impact`\n- use `gitnexus_rename` for renames\n- verify final scope with `gitnexus_detect_changes({scope: \"all\"})`\n\nThis is especially relevant in a codebase with many interconnected workflows and graph-driven execution paths.\n\n## Risk Model\n\nThe documented impact levels are depth-based:\n\n- `d=1` — direct callers/importers; these will break if not updated\n- `d=2` — indirect dependencies; likely affected\n- `d=3` — transitive dependencies; may need testing\n\nThis gives contributors a practical way to prioritize updates and validation after a change.\n\n## Repository Resources\n\nThe file points to GitNexus resources that complement the source tree:\n\n- `gitnexus://repo/darkfactory/context`\n- `gitnexus://repo/darkfactory/clusters`\n- `gitnexus://repo/darkfactory/processes`\n- `gitnexus://repo/darkfactory/process/{name}`\n\nThese are useful when the filesystem alone does not reveal how runtime behavior is composed.\n\n## Index Freshness\n\nA notable operational detail is that the GitNexus index becomes stale after commits. The documented refresh command is:\n\n```bash\nnpx gitnexus analyze\n```\n\nIf embeddings already exist and should be preserved:\n\n```bash\nnpx gitnexus analyze --embeddings\n```\n\nThe file also notes that `.gitnexus/meta.json` can be inspected to determine whether embeddings are present.\n\nThis matters because stale indexing can make impact analysis and process tracing inaccurate.\n\n## How This File Connects to the Rest of the Codebase\n\nAlthough `AGENTS.md` is not imported anywhere, it influences how contributors interact with nearly every major area of the repository.\n\n```mermaid\nflowchart TD\n    A[AGENTS.md] --> B[api/]\n    A --> C[lib/]\n    A --> D[workspace/]\n    A --> E[GitNexus workflows]\n    B --> F[routes graphs nodes repositories]\n    C --> G[shared clients/config]\n    D --> H[prompts skills memory config]\n    E --> I[impact context query detect_changes]\n```\n\nIn practice, it acts as:\n\n- a **placement guide** for new code\n- a **boundary document** for architectural decisions\n- a **safety checklist** for edits, debugging, and refactoring\n\n## Contribution Guidance\n\nWhen using this file during development, treat it as a starting point rather than a guarantee of current filesystem state.\n\n### Use it to decide where code belongs\n\nExamples:\n\n- request handling logic belongs in `api/routes/` only if it is transport-specific\n- workflow orchestration belongs in `api/graphs/` or `api/services/`\n- reusable clients belong in `lib/`\n- editable prompts/configs belong in `workspace/`\n\n### Use it to validate architectural intent\n\nIf you find:\n\n- DB access in `lib/`\n- business logic in `routes/`\n- graph topology embedded directly in node implementations\n\nthat likely conflicts with the intended structure documented here.\n\n### Use it with the actual repository state\n\nThe snapshot explicitly warns that the codebase may have diverged. Always confirm against the filesystem before assuming a directory or file exists.\n\n## Maintenance Notes\n\n`AGENTS.md` should be updated when any of the following change:\n\n- top-level module ownership or boundaries\n- planned vs. implemented phase scope\n- GitNexus workflow requirements\n- repository resources or CLI guidance\n- location of key domain modules such as `api/models/*` or `lib/github/*`\n\nBecause this file shapes contributor behavior, stale guidance can cause:\n\n- code to be added in the wrong layer\n- unsafe refactors\n- incomplete impact analysis\n- confusion about what is implemented versus planned\n\n## Summary\n\n`AGENTS.md` is the repository’s architectural and operational handbook. It documents:\n\n- the intended structure of `api/`, `lib/`, and `workspace/`\n- the MVP subset versus later-phase modules\n- the expected separation of concerns across layers\n- mandatory GitNexus workflows for editing, debugging, refactoring, and committing\n\nFor contributors, this file is best read before making structural changes, introducing new modules, or touching unfamiliar symbols.","other-alembic-ini":"# Other — alembic.ini\n\n# `alembic.ini`\n\nConfiguration file for Alembic, the database schema migration tool used alongside SQLAlchemy.\n\nThis module is not Python code; it is an INI-style configuration file that Alembic reads when running commands such as:\n\n- `alembic upgrade head`\n- `alembic downgrade`\n- `alembic revision --autogenerate`\n- `alembic current`\n- `alembic history`\n\nIt defines:\n\n- where Alembic migration scripts live\n- how Python import paths are prepared before Alembic loads project code\n- how the database URL is supplied\n- how Alembic and SQLAlchemy logging behaves during migration operations\n\n## Purpose\n\n`alembic.ini` provides the default runtime configuration for the migration environment.\n\nIn this project, it establishes a minimal Alembic setup:\n\n- migration scripts are expected under the `alembic/` directory\n- the project root is added to `sys.path`\n- the database URL is intentionally left blank in the file\n- logging is configured for Alembic and SQLAlchemy output\n\nThis file is typically consumed together with Alembic’s `env.py` and migration scripts under the configured `script_location`.\n\n## Configuration Overview\n\n```ini\n[alembic]\nscript_location = alembic\nprepend_sys_path = .\npath_separator = os\nsqlalchemy.url =\n```\n\n### `script_location = alembic`\n\nTells Alembic where to find its migration environment and revision scripts.\n\nExpected contents of that directory usually include:\n\n- `env.py`\n- `script.py.mako`\n- `versions/`\n\nWhen Alembic runs, it uses this path to load the migration environment and locate revision files.\n\n### `prepend_sys_path = .`\n\nAdds the current project root to Python’s import path before Alembic loads migration code.\n\nThis is important when `alembic/env.py` imports application modules directly, for example:\n\n- SQLAlchemy models\n- metadata definitions\n- settings/configuration helpers\n- engine/session factories\n\nWithout this, imports from the repository root may fail depending on how Alembic is invoked.\n\n### `path_separator = os`\n\nInstructs Alembic to use the operating system’s native path separator when parsing path-like configuration values.\n\nThis matters primarily for cross-platform compatibility. On Unix-like systems, separators differ from Windows, and this setting avoids hardcoding one style.\n\n### `sqlalchemy.url =`\n\nThe database URL is intentionally empty.\n\nThis usually means the project does **not** want credentials or environment-specific connection details committed into source control. Instead, the URL is expected to be supplied elsewhere, commonly by one of these patterns:\n\n- set dynamically in `alembic/env.py`\n- injected via environment variables\n- passed through external configuration loaded by the migration environment\n\nBecause the value is blank here, Alembic commands will only work if the migration environment fills it in before creating an engine.\n\n## Logging Configuration\n\nThe remainder of the file configures Python logging for Alembic runs.\n\n### Logger registration\n\n```ini\n[loggers]\nkeys = root,sqlalchemy,alembic\n```\n\nDefines three loggers:\n\n- `root`\n- `sqlalchemy`\n- `alembic`\n\n### Handler registration\n\n```ini\n[handlers]\nkeys = console\n```\n\nDefines a single handler named `console`.\n\n### Formatter registration\n\n```ini\n[formatters]\nkeys = generic\n```\n\nDefines a single formatter named `generic`.\n\n## Logger Details\n\n### Root logger\n\n```ini\n[logger_root]\nlevel = WARNING\nhandlers = console\n```\n\nThe root logger emits only warnings and above to the console.\n\nEffect:\n\n- suppresses most low-level noise from unrelated libraries\n- still surfaces warnings and errors during migration execution\n\n### SQLAlchemy logger\n\n```ini\n[logger_sqlalchemy]\nlevel = WARNING\nhandlers =\nqualname = sqlalchemy.engine\n```\n\nConfigures the `sqlalchemy.engine` logger at `WARNING`.\n\nNotably, `handlers =` is empty, so it does not attach its own handler directly and instead relies on normal logging propagation behavior.\n\nEffect:\n\n- SQL statement logging is mostly suppressed\n- warnings and engine-related issues can still appear\n\nIf verbose SQL output is needed during debugging, this level is commonly changed to `INFO`.\n\n### Alembic logger\n\n```ini\n[logger_alembic]\nlevel = INFO\nhandlers = console\nqualname = alembic\n```\n\nConfigures Alembic’s own logger to emit informational messages to the console.\n\nEffect:\n\n- migration steps are visible during commands\n- revision application progress is easier to follow\n- operational messages appear without enabling noisy SQL logging\n\nThis is a common balance for migration tooling: Alembic is informative, SQLAlchemy stays quiet unless something is wrong.\n\n## Handler and Formatter\n\n### Console handler\n\n```ini\n[handler_console]\nclass = StreamHandler\nargs = (sys.stderr,)\nlevel = NOTSET\nformatter = generic\n```\n\nDefines a standard `logging.StreamHandler` that writes to `sys.stderr`.\n\nKey points:\n\n- output goes to standard error, which is typical for tooling/logging\n- `level = NOTSET` means filtering is primarily controlled by the logger levels\n- uses the `generic` formatter\n\n### Generic formatter\n\n```ini\n[formatter_generic]\nformat = %(levelname)-5.5s [%(name)s] %(message)s\n```\n\nFormats log messages like:\n\n```text\nINFO  [alembic] Running upgrade ...\nWARNI [sqlalchemy.engine] ...\nERROR [alembic] ...\n```\n\nFormat components:\n\n- `%(levelname)-5.5s` — fixed-width log level prefix\n- `%(name)s` — logger name\n- `%(message)s` — log message body\n\nThis keeps migration output compact and readable in CLI environments.\n\n## How It Connects to the Rest of the Codebase\n\nAlthough `alembic.ini` contains no executable logic, it is part of the migration runtime path.\n\nTypical interaction looks like this:\n\n```mermaid\nflowchart TD\n    A[alembic CLI] --> B[alembic.ini]\n    B --> C[script_location: alembic/]\n    C --> D[env.py]\n    D --> E[project imports via prepend_sys_path]\n    D --> F[database URL resolution]\n    C --> G[versions/ migration scripts]\n```\n\n### Integration points\n\n#### 1. Alembic CLI\nAlembic reads this file as its primary configuration source when commands are executed.\n\n#### 2. `alembic/env.py`\nThe migration environment usually reads values from this configuration, especially:\n\n- `sqlalchemy.url`\n- logging setup\n- script location context\n\nIf `sqlalchemy.url` is blank here, `env.py` is the likely place where the actual connection URL is injected.\n\n#### 3. Application modules\nBecause `prepend_sys_path = .`, code inside `env.py` can import project modules from the repository root without requiring package installation first.\n\nThis is especially useful when migrations depend on:\n\n- model metadata for autogeneration\n- application settings\n- engine configuration helpers\n\n#### 4. Migration scripts\nThe `script_location` points Alembic to the revision history that defines schema changes over time.\n\n## Operational Expectations\n\nGiven this configuration, contributors should expect:\n\n- Alembic commands to look for migration files under `alembic/`\n- migration output to be visible at `INFO` level\n- SQLAlchemy engine logs to remain mostly quiet\n- the database URL to come from somewhere other than this file\n\n## Common Maintenance Tasks\n\n### Set or inject the database URL\n\nBecause `sqlalchemy.url` is empty, ensure one of the following is true before running migrations:\n\n- `alembic/env.py` sets the URL programmatically\n- an environment-driven config loader populates it\n- the value is overridden at runtime\n\nIf none of these are implemented, migration commands will fail when Alembic tries to connect.\n\n### Adjust logging verbosity\n\nFor more SQL visibility during debugging:\n\n- change `[logger_sqlalchemy] level` from `WARNING` to `INFO`\n\nFor quieter Alembic output:\n\n- change `[logger_alembic] level` from `INFO` to `WARNING`\n\n### Relocate migration scripts\n\nIf the migration directory moves, update:\n\n```ini\nscript_location = <new path>\n```\n\nThis must continue to point to the directory containing `env.py` and `versions/`.\n\n### Update import behavior\n\nIf project imports stop resolving during migration runs, review:\n\n```ini\nprepend_sys_path = .\n```\n\nIf the repository layout changes, this may need to point somewhere else.\n\n## Design Notes\n\nThis configuration reflects a conservative, source-control-friendly setup:\n\n- no database credentials embedded in the repository\n- enough logging to understand migration progress\n- minimal SQL noise\n- project-local imports enabled for migration code\n\nThat makes it suitable for teams where environments differ across local development, CI, staging, and production.\n\n## Summary\n\n`alembic.ini` is the central Alembic configuration file for this project. It:\n\n- points Alembic at the `alembic/` migration directory\n- prepends the project root to `sys.path`\n- leaves `sqlalchemy.url` unset so it can be provided dynamically\n- configures concise console logging for Alembic and SQLAlchemy\n\nWhen contributing to migration-related code, this file is one of the first places to check if Alembic cannot find scripts, cannot import project modules, or is too noisy or too quiet during execution.","other-alembic":"# Other — alembic\n\n# Other — alembic\n\nThis module is an Alembic migration script template, defined in `alembic/script.py.mako`. It is not a runtime application module in the usual sense; instead, it is a scaffold used by Alembic to generate new database migration files.\n\nWhen Alembic creates a revision, it renders this template with revision-specific values and writes the result as a Python migration script. That generated script is then executed by Alembic during schema upgrades and downgrades.\n\n## Purpose\n\nThe template defines the standard structure of a migration file:\n\n- migration metadata\n- imports needed for schema operations\n- an `upgrade()` function for applying schema changes\n- a `downgrade()` function for reversing them\n\nThis file exists so every generated migration follows the same shape and integrates correctly with Alembic’s migration runner.\n\n## Template Structure\n\nThe file uses Mako templating syntax such as `${...}` to inject values at generation time.\n\n```python\n\"\"\"${message}\"\"\"\n\nrevision = ${repr(up_revision)}\ndown_revision = ${repr(down_revision)}\nbranch_labels = ${repr(branch_labels)}\ndepends_on = ${repr(depends_on)}\n\nfrom alembic import op\nimport sqlalchemy as sa\n${imports if imports else \"\"}\n\n\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n\n\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n```\n\n## Key Components\n\n### Module docstring\n\n```python\n\"\"\"${message}\"\"\"\n```\n\nThe generated migration file begins with a docstring containing the revision message. This is typically the human-readable description supplied when creating the migration, such as:\n\n- `\"create users table\"`\n- `\"add index to orders.created_at\"`\n\nThis helps identify the purpose of the migration when browsing revision files.\n\n### Revision identifiers\n\n```python\nrevision = ${repr(up_revision)}\ndown_revision = ${repr(down_revision)}\nbranch_labels = ${repr(branch_labels)}\ndepends_on = ${repr(depends_on)}\n```\n\nThese variables are required by Alembic.\n\n#### `revision`\nThe unique identifier for the generated migration.\n\n#### `down_revision`\nThe parent revision this migration depends on in the migration chain. Alembic uses this to determine ordering.\n\n#### `branch_labels`\nOptional labels for branch management in more complex migration histories.\n\n#### `depends_on`\nOptional dependency metadata for revisions that rely on other revisions outside the direct linear chain.\n\nTogether, these values define where the migration sits in the revision graph.\n\n### Standard imports\n\n```python\nfrom alembic import op\nimport sqlalchemy as sa\n```\n\nThese imports are included in every generated migration.\n\n#### `op`\n`op` is Alembic’s operations interface. Migration code typically uses it for schema changes such as:\n\n- creating or dropping tables\n- adding or removing columns\n- creating indexes\n- executing raw SQL\n\n#### `sa`\n`sa` is the conventional alias for SQLAlchemy and is used for schema/type definitions inside migration operations, for example:\n\n- `sa.Column(...)`\n- `sa.Integer()`\n- `sa.String(length=255)`\n\n### Optional extra imports\n\n```python\n${imports if imports else \"\"}\n```\n\nIf Alembic autogeneration or custom migration logic requires additional imports, they are inserted here. If not, nothing is added.\n\nThis keeps generated files minimal while still allowing migrations to reference custom SQLAlchemy types, enums, helper constructs, or dialect-specific objects when needed.\n\n### `upgrade()`\n\n```python\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n```\n\n`upgrade()` contains the forward migration logic.\n\nAlembic calls this function when applying the revision. The body is injected at generation time:\n\n- if autogenerate or custom revision creation produced operations, they are inserted here\n- otherwise the function contains `pass`\n\nTypical contents include calls through `op`, such as table creation, column alteration, or index creation.\n\n### `downgrade()`\n\n```python\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n```\n\n`downgrade()` contains the reverse migration logic.\n\nAlembic calls this function when rolling back the revision. As with `upgrade()`, the body is generated dynamically and defaults to `pass` when no downgrade operations are provided.\n\nA well-formed migration should keep `upgrade()` and `downgrade()` logically paired so schema changes can be reversed safely when possible.\n\n## How Rendering Works\n\nThis file is a template, not the final migration. The placeholders are replaced when Alembic generates a revision.\n\nConceptually, the flow looks like this:\n\n```mermaid\nflowchart TD\n    A[Developer creates revision] --> B[Alembic loads script.py.mako]\n    B --> C[Template variables are rendered]\n    C --> D[Migration Python file is written]\n    D --> E[Alembic executes upgrade() or downgrade()]\n```\n\nThe important distinction is:\n\n- `alembic/script.py.mako` defines the shape of migration files\n- generated revision files contain concrete values and executable migration logic\n\n## Relationship to the Rest of the Codebase\n\nThis module connects the application’s schema evolution process to Alembic’s migration system.\n\n### What it depends on\nThe generated migration files rely on:\n\n- `alembic.op` for migration operations\n- `sqlalchemy` for schema/type definitions\n\n### How it is used\nThis template is typically consumed indirectly by Alembic commands such as revision generation. Developers usually do not import this module from application code.\n\n### What it produces\nIt produces migration scripts that become part of the repository’s migration history. Those scripts are then executed against the project database during deployment, local development, CI setup, or rollback workflows.\n\n## Expected Generated Output\n\nA rendered migration file will look roughly like this:\n\n```python\n\"\"\"create users table\"\"\"\n\nrevision = \"abc123\"\ndown_revision = \"def456\"\nbranch_labels = None\ndepends_on = None\n\nfrom alembic import op\nimport sqlalchemy as sa\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"users\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.PrimaryKeyConstraint(\"id\"),\n    )\n\n\ndef downgrade() -> None:\n    op.drop_table(\"users\")\n```\n\nThis example illustrates how the template becomes a concrete migration once placeholders are filled.\n\n## Contribution Notes\n\nIf you modify `alembic/script.py.mako`, you are changing the default structure of all newly generated migrations.\n\nKeep in mind:\n\n- existing migration files are unaffected\n- only future revisions will use the updated template\n- changes should preserve Alembic’s expected metadata variables and function names\n- `upgrade()` and `downgrade()` must remain present so Alembic can execute migrations correctly\n\n## Practical Guidelines\n\nWhen maintaining this template:\n\n- keep `revision`, `down_revision`, `branch_labels`, and `depends_on` intact\n- keep `from alembic import op` and `import sqlalchemy as sa` unless the project intentionally changes migration conventions\n- preserve the `upgrade()` and `downgrade()` function signatures\n- ensure optional sections like `${imports if imports else \"\"}` and fallback `pass` statements remain valid Python after rendering\n\nBecause this module is a template rather than a library of callable functions, its correctness is primarily about generating valid, readable, executable migration scripts.","other-claude-md":"# Other — CLAUDE.md\n\n# CLAUDE.md\n\n`CLAUDE.md` is the repository-level operating guide for working in this codebase with GitNexus. It is not executable application code and does not participate in runtime execution flows. Instead, it defines the required workflow for code exploration, debugging, refactoring, and safe modification.\n\nThis file is effectively a contributor policy document for AI-assisted and tool-assisted development in the `darkfactory` repository.\n\n## Purpose\n\nThe module exists to enforce a graph-aware development workflow:\n\n- understand code through GitNexus instead of ad hoc text search\n- assess blast radius before changing any symbol\n- validate the scope of changes before committing\n- use graph-aware rename/refactor tooling instead of manual search-and-replace\n- keep the GitNexus index current after code changes\n\nBecause this file is procedural guidance rather than source code, there are:\n\n- no functions\n- no classes\n- no internal call graph\n- no execution flows tied to runtime behavior\n\nIts “integration” with the rest of the codebase is operational: it tells contributors how to inspect and safely modify the actual code modules.\n\n## What This File Controls\n\nThe document is organized around a few core responsibilities.\n\n### 1. Repository context\n\nThe opening section identifies the repository as indexed by GitNexus under the name `darkfactory`, with summary metrics:\n\n- 2834 symbols\n- 8200 relationships\n- 244 execution flows\n\nThis establishes that the repository has a maintained code intelligence graph and that contributors should rely on it when making changes.\n\nIt also includes an important freshness rule:\n\n```bash\nnpx gitnexus analyze\n```\n\nIf GitNexus reports a stale index, contributors are expected to regenerate it before trusting graph results.\n\n### 2. Mandatory pre-edit safety checks\n\nThe strongest requirement in the file is:\n\n- before editing any function, class, or method, run `gitnexus_impact({target: \"symbolName\", direction: \"upstream\"})`\n\nThis makes impact analysis the required first step for symbol-level changes. The expected outcome is not just running the command, but reporting:\n\n- direct callers\n- affected processes\n- risk level\n\nThe file also requires warning the user before proceeding if the result is `HIGH` or `CRITICAL`.\n\nThis is the main safeguard against making local edits that break upstream callers or critical execution paths.\n\n### 3. Mandatory pre-commit verification\n\nBefore committing, contributors must run:\n\n```text\ngitnexus_detect_changes()\n```\n\nThis is used to confirm that the actual modified symbols and execution flows match the intended scope of work.\n\nThe policy here is explicit: do not commit until the detected changes are understood and expected.\n\n### 4. Preferred exploration workflow\n\nFor unfamiliar code, the file directs contributors to use:\n\n```text\ngitnexus_query({query: \"concept\"})\n```\n\ninstead of grepping through the repository.\n\nThe reason is that `gitnexus_query` returns process-grouped, relevance-ranked results, which is more useful for understanding behavior than raw text matches.\n\nWhen deeper symbol-level understanding is needed, the file points to:\n\n```text\ngitnexus_context({name: \"symbolName\"})\n```\n\nThis is the recommended way to inspect:\n\n- callers\n- callees\n- execution flow participation\n\n### 5. Debugging workflow\n\nThe debugging section defines a concrete sequence:\n\n1. `gitnexus_query({query: \"<error or symptom>\"})`\n2. `gitnexus_context({name: \"<suspect function>\"})`\n3. `READ gitnexus://repo/darkfactory/process/{processName}`\n4. optionally compare branch changes with `gitnexus_detect_changes({scope: \"compare\", base_ref: \"main\"})`\n\nThis workflow moves from symptom discovery to symbol inspection to full process tracing.\n\n### 6. Refactoring workflow\n\nThe refactoring section adds stricter rules for structural changes.\n\nFor renames:\n\n```text\ngitnexus_rename({symbol_name: \"old\", new_name: \"new\", dry_run: true})\n```\n\nmust be run first, and only after reviewing the preview should the real rename be executed with `dry_run: false`.\n\nFor extraction or splitting:\n\n- first inspect the symbol with `gitnexus_context({name: \"target\"})`\n- then assess upstream impact with `gitnexus_impact({target: \"target\", direction: \"upstream\"})`\n\nAfter any refactor, contributors must run:\n\n```text\ngitnexus_detect_changes({scope: \"all\"})\n```\n\nThis ensures the refactor did not unintentionally affect unrelated files or flows.\n\n## Key Sections\n\n## Always Do\n\nThis section defines non-optional workflow rules. The most important ones are:\n\n- always run `gitnexus_impact` before editing a symbol\n- always run `gitnexus_detect_changes()` before committing\n- always warn on `HIGH` or `CRITICAL` impact\n- prefer `gitnexus_query` for exploration\n- use `gitnexus_context` for full symbol context\n\nThese are the baseline expectations for any code modification task.\n\n## When Debugging\n\nThis section provides a symptom-to-flow investigation path. It is useful when the contributor does not yet know which symbol is responsible.\n\nThe progression is:\n\n- search by symptom\n- inspect likely symbols\n- read the full process trace\n- compare branch changes for regressions\n\n## When Refactoring\n\nThis section is specifically about safe structural edits. It distinguishes between:\n\n- renaming\n- extracting/splitting\n- post-refactor verification\n\nThe emphasis is on graph-aware operations rather than text-based edits.\n\n## Never Do\n\nThis section is the inverse of the required workflow and is useful because it makes failure modes explicit:\n\n- never edit without `gitnexus_impact`\n- never ignore `HIGH` or `CRITICAL` warnings\n- never rename with find-and-replace\n- never commit without `gitnexus_detect_changes()`\n\nThese prohibitions are important because they define what counts as unsafe contribution behavior in this repository.\n\n## Tools Quick Reference\n\nThis table is the fastest summary of the supported GitNexus commands and their intended use:\n\n- `gitnexus_query`\n- `gitnexus_context`\n- `gitnexus_impact`\n- `gitnexus_detect_changes`\n- `gitnexus_rename`\n- `gitnexus_cypher`\n\nThis section is especially useful for contributors who already understand the workflow and just need command reminders.\n\n## Impact Risk Levels\n\nThe risk table explains how to interpret impact depth:\n\n- `d=1`: direct breakage risk\n- `d=2`: likely affected indirect dependencies\n- `d=3`: transitive testing concern\n\nThis section matters because the rest of the document assumes contributors can interpret impact output and act accordingly.\n\n## Resources\n\nThe resource URIs connect the workflow to GitNexus repository views:\n\n- `gitnexus://repo/darkfactory/context`\n- `gitnexus://repo/darkfactory/clusters`\n- `gitnexus://repo/darkfactory/processes`\n- `gitnexus://repo/darkfactory/process/{name}`\n\nThese are the main entry points for navigating architecture and execution traces.\n\n## Self-Check Before Finishing\n\nThis is the final validation checklist for any code modification task:\n\n1. impact analysis was run for all modified symbols\n2. no `HIGH`/`CRITICAL` warnings were ignored\n3. `gitnexus_detect_changes()` matches expected scope\n4. all `d=1` dependents were updated\n\nThis section acts as the repository’s definition of “done” for safe edits.\n\n## Keeping the Index Fresh\n\nAfter commits, the GitNexus index is considered stale and should be regenerated.\n\nTwo commands are documented:\n\n```bash\nnpx gitnexus analyze\n```\n\nand, when embeddings already exist:\n\n```bash\nnpx gitnexus analyze --embeddings\n```\n\nThe file also documents how to inspect `.gitnexus/meta.json` and warns that running analyze without `--embeddings` will remove previously generated embeddings.\n\nThat warning is operationally important for teams relying on semantic search or embedding-backed features.\n\n## CLI Skill References\n\nThe final section maps common tasks to skill files under `.claude/skills/gitnexus/...`, including:\n\n- architecture exploration\n- impact analysis\n- debugging\n- refactoring\n- guide/reference\n- CLI usage\n\nThis makes `CLAUDE.md` the top-level routing document, while the skill files hold deeper task-specific instructions.\n\n## How It Connects to the Rest of the Codebase\n\nAlthough `CLAUDE.md` has no runtime dependencies, it is tightly connected to the repository’s development process.\n\nIt governs how contributors should interact with:\n\n- symbols in application code\n- execution flows in GitNexus\n- repository-wide architecture views\n- refactoring operations\n- pre-commit validation\n\nIn practice, this file sits above the codebase as a workflow contract:\n\n```mermaid\nflowchart TD\n    A[Contributor task] --> B[gitnexus_query]\n    A --> C[gitnexus_context]\n    A --> D[gitnexus_impact]\n    D --> E[Edit code]\n    E --> F[gitnexus_detect_changes]\n    F --> G[Commit]\n    G --> H[npx gitnexus analyze]\n```\n\nThis is the intended lifecycle for most changes:\n\n- discover relevant code\n- inspect symbol context\n- assess impact\n- edit safely\n- verify scope\n- commit\n- refresh the index\n\n## Practical Usage Patterns\n\n## Exploring unfamiliar code\n\nUse `gitnexus_query({query: \"concept\"})` first when you do not know symbol names. This is preferred over grep because it returns execution-flow-aware results.\n\nThen use `gitnexus_context({name: \"symbolName\"})` on the most relevant symbol to understand how it fits into the system.\n\n## Editing an existing function\n\nBefore changing a function, class, or method:\n\n```text\ngitnexus_impact({target: \"symbolName\", direction: \"upstream\"})\n```\n\nReview:\n\n- direct callers\n- affected processes\n- risk level\n\nIf risk is high, communicate that before proceeding.\n\n## Renaming a symbol\n\nDo not use manual search-and-replace. Instead:\n\n```text\ngitnexus_rename({symbol_name: \"old\", new_name: \"new\", dry_run: true})\n```\n\nReview the preview carefully, especially any text-search-based edits, then run the real rename.\n\n## Verifying a branch before commit\n\nRun:\n\n```text\ngitnexus_detect_changes({scope: \"staged\"})\n```\n\nor broader scopes such as:\n\n```text\ngitnexus_detect_changes({scope: \"all\"})\n```\n\nor regression comparison:\n\n```text\ngitnexus_detect_changes({scope: \"compare\", base_ref: \"main\"})\n```\n\nUse the result to confirm that only intended symbols and flows changed.\n\n## Contribution Guidance\n\nWhen updating this file, preserve its role as a strict workflow contract. Changes should be made carefully because they alter contributor expectations across the entire repository.\n\nGood updates typically:\n\n- clarify required tool usage\n- add missing safety checks\n- improve command examples\n- keep terminology consistent with actual GitNexus commands\n\nAvoid changes that:\n\n- weaken mandatory safety requirements\n- introduce ambiguous wording around required steps\n- suggest manual alternatives where graph-aware tooling is required\n- drift from the actual command names documented here\n\n## Summary\n\n`CLAUDE.md` is the repository’s GitNexus workflow policy. It does not implement application behavior, but it strongly shapes how application code should be explored, modified, debugged, and refactored.\n\nIts core rules are:\n\n- run `gitnexus_impact` before editing symbols\n- use `gitnexus_query` and `gitnexus_context` to understand code\n- use `gitnexus_rename` for safe renames\n- run `gitnexus_detect_changes()` before committing\n- refresh the GitNexus index after commits\n\nFor contributors, understanding this file is essential to making safe, graph-aware changes anywhere else in the codebase.","other-docker-compose-yml":"# Other — docker-compose.yml\n\n# `docker-compose.yml`\n\nLocal orchestration for the application stack. This file defines the two runtime services needed for development and testing:\n\n- `db`: a PostgreSQL 16 instance\n- `darkfactory`: the application container built from the repository root\n\nIt also declares a named volume, `postgres_data`, to persist database state across container restarts.\n\n## Purpose\n\nThis Compose file provides a reproducible local environment for running the application against PostgreSQL without requiring developers to install or manage those dependencies directly on the host.\n\nIt is responsible for:\n\n- building and starting the application container\n- provisioning a PostgreSQL database with expected credentials and database name\n- wiring the application to the database through container networking\n- mounting local directories into the container for workspace and target-repository access\n- injecting runtime configuration through environment variables and `.env`\n\n## High-level architecture\n\n```mermaid\nflowchart LR\n  Dev[Developer machine]\n  App[darkfactory service]\n  DB[db service<br/>postgres:16-alpine]\n  WS[./workspace]\n  Target[${TARGET_REPO_PATH}]\n  Vol[postgres_data volume]\n\n  Dev --> App\n  App --> DB\n  WS --> App\n  Target --> App\n  DB --> Vol\n```\n\n## Services\n\n## `db`\n\nPostgreSQL service used by the application.\n\n### Configuration\n\n- **Image**: `postgres:16-alpine`\n- **Port mapping**: `5432:5432`\n- **Persistent storage**: `postgres_data:/var/lib/postgresql/data`\n\n### Environment variables\n\nThe container is initialized with:\n\n- `POSTGRES_USER=darkfactory`\n- `POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-darkfactory}`\n- `POSTGRES_DB=darkfactory`\n\nThis means:\n\n- the default database user is `darkfactory`\n- the default password is taken from `POSTGRES_PASSWORD` if set in the shell or environment\n- otherwise it falls back to `darkfactory`\n- the initial database created is `darkfactory`\n\n### Persistence\n\nThe named volume:\n\n```yaml\nvolumes:\n  - postgres_data:/var/lib/postgresql/data\n```\n\nstores PostgreSQL data outside the container lifecycle. Recreating the container does not remove the database unless the volume is explicitly deleted.\n\n## `darkfactory`\n\nApplication service built from the current repository.\n\n### Configuration\n\n- **Build context**: `.`\n- **User**: `\"10001:10001\"`\n- **Port mapping**: `8000:8000`\n- **Dependency**: `depends_on: - db`\n\n### Build and startup behavior\n\n`build: .` tells Compose to build the application image from the Dockerfile in the repository root.\n\n`depends_on` ensures the `db` container is started before `darkfactory`. This does **not** guarantee PostgreSQL is ready to accept connections; it only controls startup order. If the application requires the database to be fully ready at boot, that readiness must be handled by the application entrypoint or retry logic.\n\n### Runtime user\n\nThe container runs as UID/GID `10001:10001`:\n\n```yaml\nuser: \"10001:10001\"\n```\n\nThis is typically used to avoid running the application as root and to make file ownership behavior more predictable when writing to mounted volumes.\n\nWhen contributing to this setup, keep in mind:\n\n- mounted host directories must be writable by this UID/GID, or at least compatible with it\n- permission issues in `./workspace` or the mounted target repository may surface if host ownership differs significantly\n\n## Environment configuration\n\nThe application service uses both `env_file` and inline `environment`.\n\n### `.env`\n\n```yaml\nenv_file:\n  - .env\n```\n\nVariables in `.env` are loaded into the container environment. This is useful for local secrets or machine-specific configuration.\n\n### Inline environment variables\n\nThe following variables are explicitly set for `darkfactory`:\n\n- `DATABASE_URL=postgresql://darkfactory:${POSTGRES_PASSWORD:-darkfactory}@db:5432/darkfactory`\n- `TEST_DATABASE_URL=postgresql://darkfactory:${POSTGRES_PASSWORD:-darkfactory}@db:5432/darkfactory`\n- `TEST_DATABASE_ADMIN_URL=postgresql://darkfactory:${POSTGRES_PASSWORD:-darkfactory}@db:5432/postgres`\n- `PYTEST_ADDOPTS=-p no:cacheprovider`\n- `TARGET_REPO=/work/target`\n- `WORKSPACE_ROOT=/app/workspace`\n- `CLONE_ROOT=/work/clones`\n\n### Database connection URLs\n\nThese URLs use the Compose service name `db` as the hostname. Inside the Compose network, `db` resolves to the PostgreSQL container automatically.\n\n#### `DATABASE_URL`\n\nPrimary application database connection:\n\n```text\npostgresql://darkfactory:${POSTGRES_PASSWORD:-darkfactory}@db:5432/darkfactory\n```\n\n#### `TEST_DATABASE_URL`\n\nTest database connection. In this configuration it points to the same database name, `darkfactory`.\n\n#### `TEST_DATABASE_ADMIN_URL`\n\nAdministrative connection to the default `postgres` database:\n\n```text\npostgresql://darkfactory:${POSTGRES_PASSWORD:-darkfactory}@db:5432/postgres\n```\n\nThis is commonly used by test setup code that needs to create, drop, or inspect databases.\n\n### Test behavior\n\n`PYTEST_ADDOPTS=-p no:cacheprovider` disables pytest’s cache provider plugin. This avoids creation of `.pytest_cache` and can reduce noise or permission issues in containerized environments.\n\n## Volume mounts\n\nThe `darkfactory` service mounts two host paths:\n\n```yaml\nvolumes:\n  - ./workspace:/app/workspace\n  - ${TARGET_REPO_PATH}:/work/target\n```\n\n### `./workspace:/app/workspace`\n\nMaps the repository-local `workspace` directory into the container at `/app/workspace`.\n\nThis path is also exposed to the application through:\n\n```yaml\nWORKSPACE_ROOT=/app/workspace\n```\n\nUse this for application-managed working files that should remain visible and persistent on the host.\n\n### `${TARGET_REPO_PATH}:/work/target`\n\nMaps an external host path into the container at `/work/target`.\n\nThis path is exposed through:\n\n```yaml\nTARGET_REPO=/work/target\n```\n\n`TARGET_REPO_PATH` must be defined in the shell environment or `.env` before starting Compose. If it is missing, Compose will not be able to resolve the bind mount correctly.\n\nThis mount is likely intended to give the application access to another repository or working tree outside the current project.\n\n### `CLONE_ROOT=/work/clones`\n\nUnlike `WORKSPACE_ROOT` and `TARGET_REPO`, `/work/clones` is configured only as an environment variable and is **not** mounted from the host in this file. That means:\n\n- it exists only inside the container filesystem unless created elsewhere in the image\n- any data written there may be ephemeral unless another volume or bind mount is added\n\nIf clone persistence is needed across container rebuilds or restarts, this is a likely extension point.\n\n## Networking\n\nNo explicit networks are declared, so Compose creates a default network for the project.\n\nWithin that network:\n\n- `darkfactory` can connect to PostgreSQL using host `db`\n- host machine access is available through published ports:\n  - PostgreSQL: `localhost:5432`\n  - application: `localhost:8000`\n\n## How it connects to the rest of the codebase\n\nThis file is infrastructure glue rather than application logic. It connects to the rest of the codebase through conventions and environment variables expected by the application:\n\n- `DATABASE_URL`, `TEST_DATABASE_URL`, and `TEST_DATABASE_ADMIN_URL` are consumed by the application and/or test setup for database access\n- `WORKSPACE_ROOT`, `TARGET_REPO`, and `CLONE_ROOT` define filesystem locations the application uses at runtime\n- `PYTEST_ADDOPTS` influences test execution inside the container\n- `build: .` ties this Compose setup to the repository’s Dockerfile and application packaging\n\nIn practice, this file is the entry point for running the codebase in a containerized local environment.\n\n## Common workflow\n\nTypical usage:\n\n```bash\ndocker compose up --build\n```\n\nThis will:\n\n1. build the `darkfactory` image from the current repository\n2. start the `db` container\n3. start the `darkfactory` container\n4. expose the application on port `8000`\n5. expose PostgreSQL on port `5432`\n\nTo run in the background:\n\n```bash\ndocker compose up -d --build\n```\n\nTo stop services:\n\n```bash\ndocker compose down\n```\n\nTo stop services and remove the database volume:\n\n```bash\ndocker compose down -v\n```\n\nUse `-v` carefully, since it deletes persisted PostgreSQL data in `postgres_data`.\n\n## Required local inputs\n\nBefore starting the stack, ensure these values are available as needed:\n\n### `POSTGRES_PASSWORD`\n\nOptional, but recommended for anything beyond throwaway local development.\n\nIf omitted, both the database container and application URLs fall back to `darkfactory`.\n\n### `TARGET_REPO_PATH`\n\nRequired for the bind mount:\n\n```yaml\n- ${TARGET_REPO_PATH}:/work/target\n```\n\nThis should point to a valid host directory. Example:\n\n```bash\nexport TARGET_REPO_PATH=/absolute/path/to/target/repo\ndocker compose up --build\n```\n\nA `.env` file can also provide it, depending on how Compose is being invoked.\n\n## Design notes and contribution guidance\n\n### Why service names matter\n\nThe database URLs use `@db:5432`, not `localhost`. Inside Compose, containers communicate over the internal network using service names. If you rename the `db` service, you must also update all connection URLs that reference `db`.\n\n### Why both `DATABASE_URL` and test URLs exist\n\nThe application and test tooling often need separate connection settings. Even though `DATABASE_URL` and `TEST_DATABASE_URL` currently point to the same database, keeping them separate allows future divergence without changing application code.\n\n### Why the database port is published\n\nPublishing `5432:5432` allows host tools such as `psql`, GUI database clients, or migration tooling to connect directly to the containerized PostgreSQL instance.\n\nIf host access is unnecessary, this mapping could be removed while preserving container-to-container connectivity.\n\n### Why `depends_on` is limited\n\n`depends_on` controls startup order only. If contributors encounter intermittent startup failures caused by PostgreSQL not being ready, consider one of these improvements:\n\n- add a database healthcheck and gate startup on it\n- add retry logic in the application startup path\n- use a wait-for-database entrypoint script\n\n## Potential extension points\n\nThis Compose file is intentionally minimal. Common enhancements include:\n\n- adding a healthcheck for `db`\n- mounting `/work/clones` to persist cloned repositories\n- separating development and test databases\n- adding profiles for optional services\n- restricting host port exposure when not needed\n- adding explicit restart policies\n\n## Summary\n\n`docker-compose.yml` defines the local container runtime for the project:\n\n- PostgreSQL runs in `db`\n- the application runs in `darkfactory`\n- database state persists in `postgres_data`\n- local workspace and target repository paths are mounted into the app container\n- runtime behavior is configured through `.env` and explicit environment variables\n\nFor contributors, this file is the main place to understand how the application expects its database, filesystem paths, and container runtime to be wired together during local development and testing.","other-dockerfile":"# Other — Dockerfile\n\n# Dockerfile\n\nBuilds the application container image used to run the Python API module as a non-root process on port `8000`.\n\nThis Dockerfile packages the repository into a slim Python 3.12-based image, installs both system and Python dependencies, includes the application source tree, and starts the service with:\n\n```bash\npython -m api serve --host 0.0.0.0 --port 8000\n```\n\n## Purpose\n\nThe Dockerfile defines the runtime environment for the project. It is responsible for:\n\n- selecting the base Python runtime\n- installing required OS-level tooling\n- installing Python dependencies from `pyproject.toml`\n- copying the application source into the image\n- preparing writable directories used at runtime\n- dropping privileges to a non-root user\n- defining the default process that launches the API\n\nThis file is the bridge between the repository layout and a deployable container image.\n\n## Build and Runtime Overview\n\n```mermaid\nflowchart TD\n    A[python:3.12-slim] --> B[Install git]\n    B --> C[Install uv and create appuser]\n    C --> D[Create /work/clones and /app/logs]\n    D --> E[Copy project files]\n    E --> F[uv pip install --system -e .[dev]]\n    F --> G[Switch to appuser]\n    G --> H[python -m api]\n    H --> I[serve --host 0.0.0.0 --port 8000]\n```\n\n## Base Image\n\n```dockerfile\nFROM python:3.12-slim\n```\n\nThe image starts from the official slim Python 3.12 distribution.\n\nWhy this matters:\n\n- `python:3.12-slim` keeps the image relatively small\n- Python is already installed and ready for package installation\n- Debian-based slim images still support `apt-get`, which is used later for system packages\n\n## Working Directory\n\n```dockerfile\nWORKDIR /app\n```\n\nAll subsequent filesystem operations are relative to `/app` unless absolute paths are used.\n\nThis becomes the main application directory inside the container and is where the repository contents are copied.\n\n## System Dependencies\n\n```dockerfile\nRUN apt-get update && apt-get install -y --no-install-recommends git \\\n    && rm -rf /var/lib/apt/lists/*\n```\n\nThis layer installs `git` and then removes cached package metadata.\n\n### Why `git` is installed\n\nThe Dockerfile does not directly invoke `git`, but the application or its dependencies may require it at runtime or install time. Common reasons include:\n\n- editable installs that reference VCS metadata\n- dependencies pulled from Git repositories\n- application features that clone repositories into `/work/clones`\n\nThe presence of `/work/clones` strongly suggests repository cloning is part of the broader application workflow.\n\n### Why cleanup is included\n\n```dockerfile\nrm -rf /var/lib/apt/lists/*\n```\n\nThis reduces image size by removing package index files after installation.\n\n## Python Package Tooling and Runtime User\n\n```dockerfile\nRUN pip install uv && useradd -m -u 10001 appuser\n```\n\nThis layer does two things:\n\n- installs `uv`\n- creates a non-root user named `appuser` with UID `10001`\n\n### `uv`\n\n`uv` is used later to install Python dependencies:\n\n```dockerfile\nRUN uv pip install --system -e \".[dev]\"\n```\n\nUsing `uv` instead of plain `pip` typically improves dependency resolution and installation speed.\n\n### `appuser`\n\nRunning the application as a non-root user is a security hardening measure. The container switches to this user near the end of the Dockerfile.\n\n## Runtime Directories and Permissions\n\n```dockerfile\nRUN mkdir -p /work/clones /app/logs && chown -R 10001:10001 /work/clones /app/logs\n```\n\nThis prepares two writable directories:\n\n- `/work/clones`\n- `/app/logs`\n\nBoth are owned by UID/GID `10001`, matching `appuser`.\n\n### Expected usage\n\n- `/work/clones`: likely used for checked-out repositories or temporary workspace content\n- `/app/logs`: likely used for application log output\n\nBecause the container later runs as `appuser`, these directories must be writable before the user switch.\n\n## Copied Project Files\n\n```dockerfile\nCOPY pyproject.toml .\nCOPY alembic.ini .\nCOPY alembic/ alembic/\nCOPY api/ api/\nCOPY lib/ lib/\nCOPY tests/ tests/\nCOPY workspace/ workspace/\n```\n\nThese instructions copy the repository contents needed for installation and runtime.\n\n### Included paths\n\n- `pyproject.toml`  \n  Defines Python package metadata and dependencies.\n\n- `alembic.ini`\n- `alembic/`  \n  Database migration configuration and migration scripts.\n\n- `api/`  \n  The application entrypoint module. This is especially important because the container starts with `python -m api`.\n\n- `lib/`  \n  Shared library code used by the application.\n\n- `tests/`  \n  Included in the image because the install target is `\".[dev]\"`, which may rely on test-related extras or support running tests inside the container.\n\n- `workspace/`  \n  Additional project code or assets used by the application.\n\n### Notable implication\n\nThe image is not strictly runtime-minimized. It includes development-oriented content and installs the `dev` extras, which suggests this image may be intended for development, CI, or an all-in-one environment rather than a minimal production-only artifact.\n\n## Python Dependency Installation\n\n```dockerfile\nRUN uv pip install --system -e \".[dev]\"\n```\n\nThis installs the project into the system Python environment inside the container.\n\n### Breakdown\n\n- `uv pip install`  \n  Uses `uv`'s pip-compatible installer\n\n- `--system`  \n  Installs into the container’s system Python environment rather than creating a virtual environment\n\n- `-e`  \n  Performs an editable install\n\n- `\".[dev]\"`  \n  Installs the current project along with the `dev` optional dependency group defined in `pyproject.toml`\n\n### Why editable install is used\n\nEditable install means the package is installed from the source tree copied into `/app`. This is useful when:\n\n- the package layout spans multiple directories\n- developers want source changes reflected without rebuilding installation metadata\n- tooling expects the project to be installed as a package while still using the checked-out source tree\n\nIn a container context, this is often chosen for development convenience.\n\n## User Context\n\n```dockerfile\nUSER appuser\n```\n\nFrom this point onward, the container runs as the non-root `appuser`.\n\nThis affects:\n\n- file access permissions\n- process privileges\n- ability to write only to directories that were prepared earlier, such as `/work/clones` and `/app/logs`\n\nIf future changes add runtime write paths, they must also be made writable for UID `10001`.\n\n## Network Port\n\n```dockerfile\nEXPOSE 8000\n```\n\nDocuments that the containerized service listens on port `8000`.\n\n`EXPOSE` does not publish the port by itself; it serves as metadata and a convention for operators and tooling.\n\n## Container Startup\n\n```dockerfile\nENTRYPOINT [\"python\", \"-m\", \"api\"]\nCMD [\"serve\", \"--host\", \"0.0.0.0\", \"--port\", \"8000\"]\n```\n\nThese two instructions combine into the default command:\n\n```bash\npython -m api serve --host 0.0.0.0 --port 8000\n```\n\n### How `ENTRYPOINT` and `CMD` interact\n\n- `ENTRYPOINT` defines the executable: `python -m api`\n- `CMD` provides default arguments: `serve --host 0.0.0.0 --port 8000`\n\nThis pattern is useful because callers can override only the arguments while keeping the same Python module entrypoint.\n\nExamples:\n\nRun the default server:\n\n```bash\ndocker run --rm -p 8000:8000 <image>\n```\n\nOverride the command arguments:\n\n```bash\ndocker run --rm <image> some-other-subcommand\n```\n\nThat becomes:\n\n```bash\npython -m api some-other-subcommand\n```\n\nOverride the entire entrypoint if needed:\n\n```bash\ndocker run --rm --entrypoint sh <image>\n```\n\n## Relationship to the Rest of the Codebase\n\nThis Dockerfile is tightly coupled to the repository structure and startup conventions.\n\n### `api/`\n\nThe most important integration point is:\n\n```dockerfile\nENTRYPOINT [\"python\", \"-m\", \"api\"]\n```\n\nThis requires `api` to be a valid Python module/package with module execution support. In practice, that means the `api` package must expose a runnable module entrypoint compatible with the `serve` command.\n\n### `pyproject.toml`\n\nDependency installation depends entirely on `pyproject.toml`:\n\n```dockerfile\nRUN uv pip install --system -e \".[dev]\"\n```\n\nAny changes to package metadata, dependency groups, or build backend behavior directly affect image builds.\n\n### `alembic.ini` and `alembic/`\n\nThese files are copied into the image, indicating database migrations are expected to be available in the container environment. If migration commands are run through the application or operational tooling, this image already contains the necessary migration assets.\n\n### `lib/`, `workspace/`, and `tests/`\n\nThese directories are included because they are part of the installed project or expected runtime/development environment. Removing or relocating them would require corresponding updates to packaging and import paths.\n\n## Design Characteristics\n\n### Security-conscious defaults\n\nThe Dockerfile avoids running the application as root:\n\n- creates `appuser`\n- assigns ownership of writable directories\n- switches to `USER appuser`\n\n### Development-friendly packaging\n\nThe image installs:\n\n- editable package mode (`-e`)\n- development extras (`[dev]`)\n- test sources (`tests/`)\n\nThis is convenient for iteration and CI, but may be broader than necessary for production deployment.\n\n### Single-stage build\n\nThis is a single-stage Dockerfile. That keeps it simple, but it also means:\n\n- build-time tools remain in the final image\n- development dependencies remain in the final image\n- image size may be larger than a multi-stage production build\n\n## Common Maintenance Tasks\n\n### Adding a new source directory\n\nIf the project gains a new top-level package or runtime asset directory, it must be copied into the image explicitly with another `COPY` instruction unless packaging pulls it in another way.\n\n### Adding a new writable path\n\nIf application code needs to write somewhere new at runtime, update the Dockerfile to create that path and assign ownership to UID `10001` before `USER appuser`.\n\nExample pattern:\n\n```dockerfile\nRUN mkdir -p /new/path && chown -R 10001:10001 /new/path\n```\n\n### Changing the startup command\n\nIf the CLI exposed by `python -m api` changes, update `CMD` to match the new invocation pattern.\n\n### Reducing image size for production\n\nPotential future refinements:\n\n- switch from `\".[dev]\"` to a production dependency set\n- omit `tests/` from the image\n- use a multi-stage build\n- install only runtime OS packages\n\n## Build and Run Examples\n\n### Build\n\n```bash\ndocker build -t app-image .\n```\n\n### Run\n\n```bash\ndocker run --rm -p 8000:8000 app-image\n```\n\n### Run with mounted workspace or persistent directories\n\nIf the application writes to `/work/clones` or `/app/logs`, bind mounts or volumes can be attached:\n\n```bash\ndocker run --rm \\\n  -p 8000:8000 \\\n  -v $(pwd)/logs:/app/logs \\\n  -v $(pwd)/clones:/work/clones \\\n  app-image\n```\n\nEnsure mounted host directories are writable by the container user or compatible with UID `10001`.\n\n## Things to Watch For\n\n### Build cache invalidation\n\nBecause source directories are copied before dependency installation, changes anywhere in the copied tree can invalidate the layer cache for:\n\n```dockerfile\nRUN uv pip install --system -e \".[dev]\"\n```\n\nIf build speed becomes an issue, dependency metadata could be copied earlier and source code later to improve caching.\n\n### Editable install in containers\n\nEditable installs are convenient, but they are less typical for hardened production images. If reproducibility or minimalism becomes more important, consider a non-editable install.\n\n### UID assumptions\n\nThe Dockerfile hardcodes UID `10001`. This is usually fine, but can matter when integrating with host-mounted volumes or orchestrators that enforce specific user IDs.\n\n## Summary\n\nThis Dockerfile creates a Python 3.12 container image that:\n\n- installs `git` and `uv`\n- installs the project from `pyproject.toml` with `dev` extras\n- includes application, migration, library, test, and workspace code\n- prepares writable runtime directories\n- runs as non-root `appuser`\n- starts the service with `python -m api serve --host 0.0.0.0 --port 8000`\n\nIt is best understood as the project’s containerized execution environment: simple, development-friendly, and directly aligned with the repository’s Python package layout.","other-memory":"# Other — memory\n\n# Other — memory\n\nThe `workspace/memory` module defines the project’s persistent, human-maintained memory files. It is not an executable code module and contains no functions, classes, or runtime control flow. Instead, it provides a simple convention for storing durable context that can be reused across agent runs or development sessions.\n\n## Purpose\n\nThis module exists to separate two kinds of long-lived context:\n\n- **Operational history**: what happened over time\n- **Persistent facts**: what should continue to be true and available as context\n\nBy keeping these in dedicated Markdown files, the project can preserve useful state without embedding it in source code or relying on transient session memory.\n\n## Files\n\n### `workspace/memory/HISTORY.example.md`\n\nA template for recording chronological notes about prior work.\n\nCurrent contents describe it as:\n\n- a place to append operational notes, summaries, and prior outcomes\n- a chronological log of notable events or milestones\n\nExample entries include:\n\n- initial scaffolding\n- adding tool wrappers\n- running a planning loop\n\nThis file establishes the expected format and intent for a real `HISTORY.md`-style memory file.\n\n### `workspace/memory/MEMORY.example.md`\n\nA template for storing persistent facts that should be loaded into agent context.\n\nCurrent contents describe it as:\n\n- durable preferences\n- environment assumptions\n- default project-specific facts\n\nExample entries include:\n\n- coding style preferences\n- default GitHub repository owner\n- deployment environment details\n\nThis file establishes the expected format and intent for a real `MEMORY.md`-style memory file.\n\n## What this module is not\n\nThis module does **not** contain:\n\n- Python modules\n- exported APIs\n- classes\n- functions\n- configuration parsers\n- runtime logic\n\nThe call graph confirms this:\n\n- no internal calls\n- no outgoing calls\n- no incoming calls\n- no detected execution flows\n\nThat means this module should be understood as a **documentation/data convention**, not as executable application behavior.\n\n## Conceptual model\n\nThe two files serve different roles:\n\n```mermaid\nflowchart TD\n    A[workspace/memory] --> B[HISTORY.example.md]\n    A --> C[MEMORY.example.md]\n    B --> D[Chronological notes]\n    C --> E[Persistent facts]\n```\n\n## How it works\n\nAt a practical level, this module works through convention:\n\n1. A developer or agent reads the example files to understand the intended structure.\n2. Real memory files can be created based on those templates.\n3. Those files are updated manually as the project evolves.\n4. Other parts of the system may load or reference this information as contextual input, but that behavior is not implemented in this module itself.\n\nBecause there is no parser or schema enforcement here, the module relies on:\n\n- clear file naming\n- simple Markdown structure\n- disciplined manual updates\n\n## Key design characteristics\n\n### Template-based\n\nBoth files are examples rather than active project memory stores. The `.example.md` suffix signals that they are starter documents or reference formats.\n\nThis is useful when:\n\n- bootstrapping a new workspace\n- documenting expected memory structure without committing real project-specific memory\n- providing safe defaults in a repository template\n\n### Human-readable and editable\n\nMarkdown keeps the memory layer easy to inspect and modify without specialized tooling. Contributors can update these files directly during normal development workflows.\n\n### Low coupling\n\nSince there is no code in this module, it has no direct dependency surface. It can be adopted by any surrounding tooling that wants to read persistent context, but it does not impose implementation details on that tooling.\n\n## Recommended usage\n\n### Use `HISTORY` for event-oriented records\n\nGood candidates include:\n\n- completed milestones\n- notable decisions and outcomes\n- summaries of prior runs\n- operational incidents or fixes\n- timeline-oriented notes\n\nEntries should generally be append-only and date-oriented.\n\n### Use `MEMORY` for stable context\n\nGood candidates include:\n\n- coding conventions\n- repository defaults\n- environment assumptions\n- recurring preferences\n- durable project facts that should be remembered across sessions\n\nEntries should be concise and written as facts rather than narratives.\n\n## Contribution guidance\n\nWhen extending or maintaining this module:\n\n### Preserve the separation of concerns\n\nKeep historical logs and persistent facts in separate files.\n\n- Put “what happened” in `HISTORY`\n- Put “what remains true” in `MEMORY`\n\nAvoid mixing these categories, since that makes downstream context loading less predictable.\n\n### Keep entries short and actionable\n\nThese files are most useful when entries are easy to scan. Prefer:\n\n- bullet points\n- dated entries for history\n- stable declarative statements for memory\n\n### Treat example files as templates\n\nIf the repository expects active memory files, create concrete versions derived from these examples rather than overloading the examples themselves, unless the project intentionally uses the example files directly.\n\n### Avoid storing volatile or sensitive data\n\nBecause these files are plain text and likely committed to version control, they should not contain:\n\n- secrets\n- tokens\n- credentials\n- rapidly changing ephemeral state better suited to logs or runtime storage\n\n## Relationship to the rest of the codebase\n\nThis module connects to the broader system as a **context source**, not as executable infrastructure.\n\nIn practice, its role is likely to be:\n\n- a reference for how persistent memory should be structured\n- a source of durable context for agents or automation\n- a lightweight project knowledge base maintained alongside code\n\nHowever, there are no direct code-level integrations visible in this module itself. Any loader, reader, or prompt-construction logic that consumes these files would live elsewhere in the codebase.\n\n## Maintenance expectations\n\nThis module will remain simple unless the project later adds code to formalize memory handling. If that happens, likely future additions would include:\n\n- file discovery conventions\n- Markdown parsing\n- validation rules\n- merge/update policies\n- context-loading utilities\n\nAt present, none of that exists here. The module’s value comes from establishing a clear, minimal convention for persistent project memory.","other-plans":"# Other — plans\n\n# Other — plans\n\nThe `docs/plans/` module is the architectural contract for DarkFactory. It is not executable code, but it defines the system behaviors that implementation code is expected to preserve across phases.\n\nThis module matters because later code in `api/`, `lib/`, migrations, and worker orchestration is supposed to be derived from these documents rather than invented ad hoc. If you are changing runtime behavior, persistence, lifecycle transitions, or agent permissions, this is the place that explains what must remain true.\n\n## What lives here\n\nCurrent documents:\n\n- `docs/plans/00_cross_phase_invariants.md`\n- `docs/plans/01_mvp_implementation.md`\n\nThese serve different roles:\n\n- `00_cross_phase_invariants.md` defines non-negotiable rules that every later phase must obey\n- `01_mvp_implementation.md` turns those rules into a concrete MVP architecture and implementation sequence\n\nA useful way to read them is:\n\n1. Read `00_cross_phase_invariants.md` for constraints\n2. Read `01_mvp_implementation.md` for the first concrete system design built under those constraints\n\n## Module purpose\n\nThis module defines:\n\n- the durability model\n- the task and publication lifecycle model\n- the service boundary\n- the agent permission model\n- the git execution model\n- the expected repository structure\n- the phased implementation order\n\nIn practice, these docs are the source of truth for decisions like:\n\n- whether `tasks.status` can be reused for publication state\n- whether a planner agent may write files directly\n- whether recovery can resume from an unpushed clone\n- whether internal orchestration may call HTTP endpoints\n- whether multiple tasks may mutate the same branch concurrently\n- whether `workspace/` is generally writable\n\nThe answer to all of those is already specified here.\n\n## Relationship between the two documents\n\n```mermaid\nflowchart TD\n    A[00_cross_phase_invariants.md] --> B[01_mvp_implementation.md]\n    A --> C[Later phase plans]\n    B --> D[api/services]\n    B --> E[api/graphs and nodes]\n    B --> F[repositories and migrations]\n    B --> G[Docker and runtime setup]\n```\n\n`00_cross_phase_invariants.md` is the policy layer.  \n`01_mvp_implementation.md` is the first implementation blueprint.\n\nThe MVP plan may add detail, but it is not allowed to violate the invariants.\n\n---\n\n## `00_cross_phase_invariants.md`\n\nThis document defines the architectural rules that all planning docs in `docs/plans/` must preserve.\n\n### 1. Durability and recovery\n\nThe core durability split is:\n\n- **PostgreSQL** stores transactional and checkpointed state\n- **Git** stores evolving artifacts like plans and code\n\nImportant implications:\n\n- task clones are ephemeral\n- commits inside a clone are not durable until pushed\n- recovery happens only at durable boundaries\n- debug surfaces are not the recovery contract\n\nThis is one of the most important constraints in the entire planning set. It prevents implementation from treating local clone state, in-memory graph state, or operator-facing debug snapshots as durable truth.\n\nWhen implementing recovery logic in `api/services/tasks.py`, `api/services/execution.py`, or git helpers in `lib/git/client.py`, this invariant is the reason recovery must reconcile against persisted DB state and durable git refs rather than \"whatever was probably happening before the crash.\"\n\n### 2. Loop control vs. human governance\n\nThis document requires a single explicit control signal for feedback loops.\n\nThat means:\n\n- internal graph routing must be driven by structured state like `ReviewFeedback.status`\n- human feedback cannot silently override routing through prose alone\n- if human review blocks something, the plan must say whether it blocks execution routing or only publication\n\nThis directly supports the MVP distinction between:\n\n- graph-internal review outcomes\n- `tasks.status`\n- `publication_status`\n\n### 3. Agent write contract\n\nThis is the rule behind `ReadOnlyBackend` in the MVP plan.\n\nKey requirements:\n\n- read-only agents do not mutate files directly\n- if they author content, they return structured output or text to a wrapper\n- wrappers or services perform writes and commits\n- external side effects must go through service- or node-owned wrappers with durable bookkeeping\n\nThis is why the MVP describes planner and reviser roles as returning markdown content while node wrappers write `.darkfactory/plans/{task_id}.md` and commit it.\n\n### 4. Service boundary\n\nThe required layering is explicit:\n\n- **Routes -> Service -> Repository -> Database**\n\nAlso:\n\n- workers call services directly\n- internal orchestration must not call internal HTTP endpoints\n- route handlers should persist intent and return quickly\n- graph wrappers that map outcomes into `tasks.status`, `result_type`, and `publication_status` belong in service-owned orchestration\n\nThis is the architectural basis for `TaskService.create_task(...)` and `TaskService.run_task(...)` in the MVP plan.\n\n### 5. Isolation for parallel work\n\nMutable state must be isolated per task and per worker.\n\nThe invariant requires:\n\n- one clone per task\n- one clone per worker\n- isolated mutable state for any parallel coding strategy\n\nEven though the MVP is single-worker, this rule prevents future designs from sharing a checkout across concurrent coders.\n\n### 6. Repo authority\n\nDarkFactory normally operates against one configured authoritative target repository.\n\nThis rules out:\n\n- arbitrary per-task repo selection\n- implicit multi-repo execution\n- drifting follow-up tasks onto a different repo\n\nThis is why the MVP request body includes task context like `base_branch` and `relevant_files`, but not an arbitrary execution repository.\n\n### 7. Follow-up task uniqueness\n\nThis defines idempotency and branch exclusivity rules for later blocked-PR follow-up work.\n\nEven though Phase 01 does not implement follow-up tasks yet, this invariant matters because later phases must preserve:\n\n- uniqueness by triggering review event\n- exclusivity for active branch mutation\n\n### 8. Workspace resource governance\n\nThis section distinguishes between:\n\n- **authoring surfaces** like `workspace/prompts/` and `workspace/skills/`\n- **governed resources** like `workspace/memory/`, `workspace/config/`, and `workspace/HEARTBEAT.md`\n\nThe key rule is that `workspace/` is not generally writable.\n\nFor Phase 01 specifically:\n\n- prompts and skills are readable\n- direct task-scoped writes are deferred\n- governed resources must be updated only through trusted wrappers\n\nThis is why the MVP explicitly keeps `workspace/` read-only to normal task execution.\n\n### 9. Workspace reload contract\n\nThis defines reload semantics by resource type:\n\n- prompts load at invocation time\n- skills/configs reload on bounded cadence or explicit reload points\n- memory is read on demand\n- invalid runtime reloads keep the last known-good state\n\nThis is a planning-level contract for future implementation in config loaders and runtime refresh logic.\n\n### 10. Configuration maturity\n\nThis warns against over-generalizing configuration too early.\n\nThe practical guidance is:\n\n- use structured models early\n- avoid claiming config is \"trivial deserialization\" before contracts stabilize\n- prefer agent config before workflow-definition YAML\n\nThis is why the MVP uses `AgentConfig` Pydantic models hardcoded in Python instead of introducing YAML registries immediately.\n\n### 11. Task lifecycle state machine\n\nThis establishes that `tasks.status` must be a formal state machine.\n\nRules include:\n\n- later phases may extend but not redefine base states\n- terminal states do not transition further\n- transitions must be guarded in code\n- graph-internal state is separate from task lifecycle state\n\nThis is foundational for the MVP distinction between `PlanningState.status` and `tasks.status`.\n\n### 12. Execution vs. publication lifecycle\n\nThis is another critical invariant.\n\nExecution and publication must remain separate concerns:\n\n- execution answers whether the graph finished the work\n- publication answers whether the result was delivered durably\n\nIt also states:\n\n- if the branch is the durable execution artifact, execution is not fully successful until that branch exists durably\n- later publication stages remain separate\n- human PR review is publication-stage control by default\n- follow-up work should usually be modeled as a new task\n\nThis is the direct basis for the MVP `publication_status` field and the rule that a task is not `succeeded` until the branch push succeeds.\n\n### 12a. Task-internal session rotation\n\nThis is future-facing guidance for worker-session rotation inside a task.\n\nImportant constraints:\n\n- task lifecycle remains primary\n- handoff only at durable boundaries\n- summaries are descriptive, not authoritative\n- no implicit handoff if a worker dies before recording a terminal session outcome\n\nThis section does not affect MVP code directly, but it constrains future scheduler and orchestration work.\n\n### 13. Schema migration\n\nAll schema changes must use Alembic or equivalent.\n\nNo startup-time schema creation is allowed.\n\nThis aligns with the MVP plan's requirement for versioned migrations and startup schema verification.\n\n### 14. Default phasing\n\nThis defines the preferred implementation order across planning docs.\n\nIt places `01_mvp_implementation.md` first, followed by GitHub integration, verification, extensibility, observability, scaling, autonomy, and advanced loop intelligence.\n\n### 15. Prompt context trust boundary\n\nPrompt inputs are divided into three trust classes:\n\n- trusted workspace resources\n- trusted system-generated structured state\n- untrusted external-origin text\n\nThe key rule is that untrusted text may inform prompts but must not become workflow authority by prose alone.\n\nThis matters for any future prompt assembly code and for later GitHub/repository-context ingestion.\n\n---\n\n## `01_mvp_implementation.md`\n\nThis document is the concrete MVP blueprint. It translates the invariants into a specific architecture, data model, execution flow, and implementation sequence.\n\n## MVP scope\n\nThe MVP supports two task types:\n\n- `plan`\n- `implement`\n\nBoth use deterministic review loops.\n\n### Plan task\n\nFlow:\n\n- create branch\n- generate plan\n- review plan\n- revise until approved, rejected, or max iterations\n\n### Implement task\n\nFlow:\n\n- create branch\n- run planning loop\n- code against approved plan\n- review code\n- revise until approved, rejected, or max iterations\n\nThe MVP result is not a merged PR. It is a durable task branch pushed back to the authoritative repository.\n\n## Architectural model\n\nThe MVP plan defines DarkFactory as:\n\n- a FastAPI gateway\n- a service-owned orchestration layer\n- LangGraph graphs for planning and implementation loops\n- PostgreSQL for checkpoints and transactional records\n- Git for plans and code artifacts\n\nA central design choice is that LangGraph is used as a library, not as the platform boundary. DarkFactory owns its own API, lifecycle handling, and later scheduling.\n\n## Key implementation contracts in the MVP plan\n\n### Task creation and execution ownership\n\n`POST /api/v1/task` should:\n\n- validate input\n- persist intent\n- create the task row\n- return quickly\n\nIt should not:\n\n- clone repos\n- create branches\n- perform git mutations inline\n\nThose actions belong to worker-owned execution, coordinated through service-layer code.\n\n### Branch and clone model\n\nThe MVP uses:\n\n- one clone per task at `/work/clones/{task_id}/`\n- one branch per task, persisted as `tasks.branch_name`\n- canonical branch naming `darkfactory/{task_id}`\n\nThe branch is the durable artifact once pushed.\n\n### Plan storage model\n\nPlans are stored in git at:\n\n- `.darkfactory/plans/{task_id}.md`\n\nThey are not stored as the canonical artifact in PostgreSQL.\n\nReviewers inspect plan evolution using `git diff`, and plan history is the git log for that file.\n\n### Review storage model\n\nReviews and code reviews are stored in PostgreSQL `artifacts`.\n\nThe MVP requires provenance metadata for `review` and `code_review` artifacts:\n\n- `loop_stage`\n- `review_iteration`\n- `baseline_commit`\n- `reviewed_head_commit`\n\nThat provenance is part of the durability contract, especially for recovery and publication reconciliation.\n\n### Lifecycle separation\n\nThe MVP formalizes two separate state machines:\n\n- `tasks.status`\n- `publication_status`\n\n`tasks.status` tracks task lifecycle.  \n`publication_status` tracks durable delivery of the branch.\n\nThis allows states like:\n\n- `status='failed'`\n- `result_type='approved'`\n- `publication_status='failed'`\n\nwhich means the graph approved the work, but DarkFactory failed to publish the branch durably.\n\n## Graph and state design\n\nThe MVP plan defines two graph state models:\n\n- `PlanningState`\n- `ImplementationState`\n\n`ImplementationState` extends `PlanningState` with coding-specific fields like:\n\n- `code_review`\n- `last_reviewed_commit`\n- `code_iteration`\n- `max_code_iterations`\n\nThe planning router is explicitly driven by `ReviewFeedback.status`, with max-iteration checks based on incremented iteration state.\n\nThis is important because it keeps routing deterministic and structured rather than inferred from prose.\n\n## Agent model\n\nThe MVP defines role-based agents through `AgentConfig`.\n\nRoles include:\n\n- planner\n- reviewer\n- reviser\n- coder\n- code reviewer\n\n### Factory boundary\n\nNodes do not call `create_deep_agent()` directly. They go through `api/agents/factory.py` via `create_agent(...)`.\n\nThat factory is the integration seam for:\n\n- backend resolution\n- prompt loading\n- tool resolution\n- future DeepAgents API changes\n\n### Read-only enforcement\n\nThe MVP explicitly documents `ReadOnlyBackend` as a DarkFactory-built wrapper around `FilesystemBackend`.\n\nThis exists because DeepAgents built-in filesystem tools are present by default, and backend choice alone does not make file writes read-only.\n\nThe wrapper must block:\n\n- `write()` / `awrite()`\n- `edit()` / `aedit()`\n- `upload_files()` / `aupload_files()`\n\nThis is a direct implementation of the cross-phase Agent Write Contract.\n\n### Node invocation pattern\n\nNodes follow a consistent pattern:\n\n1. marshal graph state into a prompt\n2. invoke the agent with `ainvoke(...)`\n3. extract text or `structured_response`\n4. return a state update dict\n\nThis keeps graph state separate from agent protocol details.\n\n## Persistence model\n\nThe MVP plan is explicit about storage boundaries.\n\n### PostgreSQL owns\n\n- LangGraph checkpoints\n- task metadata\n- review artifacts\n- result summaries\n\n### Git owns\n\n- plan files\n- code changes\n- branch history\n\nThis split is one of the strongest themes in the plan. If you are implementing a feature and are unsure where data belongs, this section is usually the answer.\n\n## Recovery model\n\nRecovery is based on durable boundaries only.\n\nThe MVP plan requires startup recovery to inspect `tasks.status='running'` and reconcile against:\n\n- LangGraph checkpoints\n- persisted task/artifact state\n- durable git branch state\n\nIt explicitly rejects pretending that unpushed clone state is recoverable if durability is insufficient.\n\nPublication recovery is reconciliation-first:\n\n- check whether the branch already exists durably at the expected commit\n- use persisted provenance like `reviewed_head_commit`\n- only push again if reconciliation cannot prove publication already happened\n\n## Observability boundary\n\nThe MVP includes a minimal observability baseline:\n\n- structured logs\n- `GET /api/v1/task/{id}/debug`\n- service-owned `result` artifacts\n- temporary failed-clone retention\n\nIt explicitly excludes:\n\n- durable event streams\n- WebSocket streaming\n- replay/event fan-out\n- cost/token aggregation\n\nThis is important because it prevents debug surfaces from becoming accidental control-plane state.\n\n## Git execution model\n\nThe MVP chooses clone-and-push rather than mutating the authoritative checkout directly.\n\nReasons documented in the plan:\n\n- host working tree stays untouched\n- no worktree/config pollution\n- crash leaves authoritative repo clean\n- pushed branches survive container failure\n- unpushed clone commits are correctly treated as non-durable\n\nThis section also defines git diff as first-class review context for both plans and code.\n\n## Docker and runtime assumptions\n\nThe MVP plan includes:\n\n- `Dockerfile`\n- `docker-compose.yml`\n- mounted target repo at `/work/target`\n- mounted `workspace/`\n- non-root runtime user\n- startup validation for DB, git, repo validity, and API keys\n\nThese are part of the implementation contract, not just deployment notes.\n\n## Exception and logging guidance\n\nThe plan also specifies:\n\n- a shared `DarkFactoryError` base in `lib/errors.py`\n- boundary-specific exception types in `lib/git/errors.py` and `api/errors.py`\n- route-level exception mapping to HTTP status codes\n- a singleton logger pattern in `lib/logger.py`\n\nThis gives contributors a consistent error and logging model before feature code expands.\n\n## Project structure contract\n\nThe MVP plan proposes a concrete source layout across:\n\n- `api/`\n- `lib/`\n- `workspace/`\n- `tests/`\n- `alembic/`\n\nThis is useful when adding new code because it clarifies ownership boundaries:\n\n- routes validate and dispatch\n- services own orchestration\n- repositories persist\n- nodes and graphs contain workflow logic\n- `lib/` holds shared lower-level utilities like git and config\n\n## Testing strategy\n\nThe plan defines a layered testing approach:\n\n- unit tests with mocked agent factory boundary\n- integration tests with stub agents driving full graphs\n- opt-in smoke tests with real LLM calls\n\nIt also specifies:\n\n- transaction-rollback DB fixtures\n- temporary git repo fixtures\n- no test-only production paths\n\nThis is the intended contribution model for new implementation work.\n\n## Atomic commit train\n\nThe final section of `01_mvp_implementation.md` is especially useful for contributors because it breaks the MVP into bisectable implementation steps.\n\nThe sequence starts with:\n\n1. bootstrap runtime skeleton\n2. schema and repository foundation\n3. task API and async execution wrapper\n4. git client and branch setup\n5. agent factory and read-only enforcement\n6. planner node and planning state\n7. review-revise loop with provenance\n8. coder and code reviewer workflow\n\nThis is not just project management detail. It encodes dependency order and architectural priorities:\n\n- persistence before orchestration\n- orchestration before git mutation\n- git mutation before agent-driven artifact generation\n- permission boundaries before role implementation\n- provenance before recovery\n\n---\n\n## How this module connects to the rest of the codebase\n\nAlthough this module has no runtime call graph, it is upstream of most implementation decisions.\n\n### `api/routes/*`\n\nThe plans define that routes:\n\n- validate requests\n- dispatch to services\n- return quickly\n- do not own orchestration or internal HTTP chaining\n\n### `api/services/*`\n\nThe plans place lifecycle ownership here:\n\n- task creation\n- execution orchestration\n- publication finalization\n- recovery handling\n- state-machine enforcement\n\n### `api/graphs/*` and `api/nodes/*`\n\nThe plans define:\n\n- graph topology\n- deterministic routing\n- node responsibilities\n- wrapper-owned writes and commits\n- structured review outputs\n\n### `api/agents/*`\n\nThe plans define:\n\n- `AgentConfig`\n- `create_agent(...)` as the DeepAgents seam\n- `ReadOnlyBackend`\n- invocation/extraction helpers\n- role-specific backend/tool expectations\n\n### `api/repositories/*` and Alembic migrations\n\nThe plans define:\n\n- `tasks` and `artifacts` responsibilities\n- lifecycle fields\n- provenance requirements\n- migration-first schema evolution\n\n### `lib/git/*`\n\nThe plans define:\n\n- clone-and-push execution\n- branch naming\n- plan path conventions\n- durability boundaries\n- cleanup expectations\n\n### `workspace/*`\n\nThe plans define:\n\n- prompt loading behavior\n- read-only vs governed surfaces\n- future reload semantics\n- trust boundaries for prompt assembly\n\n---\n\n## How to use these docs when contributing\n\n### When changing behavior\n\nUpdate the relevant plan if you are changing a contract, especially around:\n\n- lifecycle states\n- recovery guarantees\n- publication semantics\n- agent permissions\n- workspace mutability\n- repo authority\n- branch reuse/follow-up task behavior\n\n### When implementing code\n\nUse these docs to answer design questions before coding:\n\n- Should this state live in PostgreSQL or Git?\n- Is this a route concern or a service concern?\n- Can this agent write directly?\n- Is this debug data or durable recovery state?\n- Does this belong to execution lifecycle or publication lifecycle?\n\n### When reviewing PRs\n\nReview against the invariants first, then the MVP plan.\n\nA change may be locally reasonable but still violate the documented architecture if it:\n\n- collapses `tasks.status` and `publication_status`\n- lets read-only roles mutate files directly\n- treats clone state as durable\n- routes internal orchestration through HTTP\n- makes `workspace/` generally writable\n- introduces unguarded lifecycle transitions\n\n---\n\n## Practical reading order for contributors\n\nIf you are new to the codebase:\n\n1. Read `docs/plans/00_cross_phase_invariants.md`\n2. Read `docs/plans/01_mvp_implementation.md`\n3. Then inspect:\n   - `api/services/tasks.py`\n   - `api/services/execution.py`\n   - `api/graphs/planning.py`\n   - `api/graphs/implementation.py`\n   - `api/agents/factory.py`\n   - `api/agents/backends.py`\n   - `lib/git/client.py`\n\nThat sequence mirrors the architecture described here.\n\n## Summary\n\nThe `docs/plans/` module is the design contract for DarkFactory.\n\n- `00_cross_phase_invariants.md` defines what must remain true across all phases\n- `01_mvp_implementation.md` defines the first concrete architecture built under those rules\n\nThe most important themes across both documents are:\n\n- PostgreSQL and Git have distinct durability roles\n- execution and publication are separate lifecycles\n- service-layer orchestration owns lifecycle mutation\n- read-only agents do not write directly\n- clones are ephemeral; pushed branches are durable\n- `workspace/` is governed, not generally writable\n- structured state, not prose, controls routing\n\nIf implementation code diverges from those principles, it is diverging from this module.","other-prompts":"# Other — prompts\n\n# Other — prompts\n\nThe `workspace/prompts/` module contains mutable prompt templates used by DarkFactory roles. These files are not executable code; they are instruction assets that shape how role-specific agents behave during planning, review, implementation, and revision steps.\n\nBecause these prompts directly influence repository changes and approval decisions, they function as part of the system’s operational logic even though they are plain Markdown files.\n\n## Purpose\n\nThis directory defines the behavioral contract for several DarkFactory roles:\n\n- `planner` — creates implementation plans\n- `reviewer` — reviews plans\n- `reviser` — revises plans after review\n- `coder` — implements approved plans\n- `code_reviewer` — reviews code changes against the approved plan\n- `PROMPT.example.md` — documents the intended use of the folder\n\nThese prompts are designed to keep each role narrowly scoped:\n\n- planning roles are read-only\n- implementation is limited to the task clone and current branch\n- review roles require structured output\n- verification is treated as a first-class gate for approval\n\n## Module Contents\n\n### `PROMPT.example.md`\n\nA lightweight example and directory-level note:\n\n- identifies this folder as the place for mutable prompt templates\n- indicates that prompts are referenced by DarkFactory configs\n- gives examples of prompt categories such as planner, reviewer, and feedback curator\n\nThis file is documentation-oriented rather than operational.\n\n### `planner.md`\n\nDefines the planner role.\n\nKey instructions:\n\n- operate in **read-only** mode\n- inspect repository structure and git history before planning\n- use safe git queries such as:\n  - `git log --oneline -- <path>`\n  - diffs against known refs\n- avoid guessing ancestor revisions like `HEAD~N` unless confirmed\n- produce a concise, implementation-ready Markdown plan\n\nRequired plan content:\n\n- clear goal\n- likely files or modules to change\n- ordered implementation steps\n- risks, assumptions, and validation needs\n\nOutput constraint:\n\n- return **only** the Markdown plan content for the task plan file\n\n### `reviewer.md`\n\nDefines the plan reviewer role.\n\nKey instructions:\n\n- operate in **read-only** mode\n- review the current task plan against the task description and acceptance criteria\n- on first review, inspect the full plan file\n- on later reviews, prefer the git diff for the plan file to focus on changes\n\nRequired structured output:\n\n- `status`: `APPROVED`, `NEEDS_REVISION`, or `REJECTED`\n- risk level\n- concise feedback\n- actionable suggestions when revisions are needed\n\n### `reviser.md`\n\nDefines the plan reviser role.\n\nKey instructions:\n\n- operate in **read-only** mode\n- use:\n  - the current plan\n  - reviewer feedback\n  - relevant repository context\n- optionally inspect git history and latest plan diff\n- use safe git queries\n- avoid unverified ancestor references like `HEAD~N`\n\nRequired output:\n\n- return **only** the full revised Markdown plan content\n- do not separately describe changes unless they belong inside the plan\n\nThis prompt is intentionally focused on producing a replacement plan document rather than commentary about the revision process.\n\n### `coder.md`\n\nDefines the implementation role.\n\nKey instructions:\n\n- has write access inside the task clone\n- has shell access for implementation work\n- implement the approved plan directly in the repository\n- keep changes focused on the requested task\n- use git history or diffs when helpful for consistency\n- when revising after code review, address feedback concretely\n\nImportant behavioral constraints:\n\n- deterministic verification is service-owned and runs automatically\n- when verification context is provided, fix only failures caused by modified code\n- if unrelated tests fail, note them in the summary instead of broadening scope\n- do **not** run `git commit`\n- DarkFactory creates the final commit\n- may inspect:\n  - `git status --short`\n  - `git diff`\n  - git history\n- must leave changes in the working tree\n- must not stop without editing the repository when code changes are required\n\nRequired output:\n\n- a short plain-language summary of implementation work completed\n\n### `code_reviewer.md`\n\nDefines the code review role for implementation changes.\n\nKey instructions:\n\n- operate in **read-only** mode\n- review implementation against:\n  - the approved plan\n  - repository context\n- use the plan on the task branch as the intent source\n- prefer git diff output to focus on relevant changes\n- require deterministic verification before approval\n- ensure verification covers the same `HEAD` commit under review\n- be skeptical of unnecessary or over-engineered changes\n- confirm implementation matches the plan, not just that it appears reasonable\n- use verification summary and bounded output excerpt to validate relevance\n\nRequired structured output:\n\n- `status`: `APPROVED`, `NEEDS_REVISION`, or `REJECTED`\n- risk level\n- concise feedback\n- actionable suggestions when revisions are needed\n\n## How the prompts work\n\nThese files are consumed as role instructions by the broader DarkFactory workflow. The module itself contains no code, no functions, and no internal call graph; its behavior comes from how external orchestration loads a prompt and uses it to guide an agent for a specific workflow step.\n\nAt a high level:\n\n1. A workflow step selects one of these prompt files.\n2. The selected prompt establishes permissions and expectations.\n3. The agent performs its role within those constraints.\n4. The agent returns output in the format required by the prompt.\n5. Downstream workflow stages use that output as input.\n\n## Role boundaries and workflow intent\n\nThe strongest design pattern in this module is explicit separation of responsibilities.\n\n### Read-only roles\n\nThese prompts explicitly prohibit repository edits:\n\n- `planner.md`\n- `reviewer.md`\n- `reviser.md`\n- `code_reviewer.md`\n\nThis keeps planning and review stages non-destructive and auditable.\n\n### Write-enabled role\n\nOnly `coder.md` authorizes repository modification. Even then, it narrows behavior:\n\n- edit only within the task clone\n- stay on the current task branch\n- remain scoped to the approved plan\n- do not create commits\n- leave final commit creation to DarkFactory\n\nThis separation reduces accidental scope creep and keeps implementation traceable to an approved plan.\n\n## Output contracts\n\nSeveral prompts define strict output shapes. These are important because downstream automation likely depends on predictable formatting.\n\n### Markdown-only plan outputs\n\nBoth planning prompts require document-only output:\n\n- `planner.md` → only the plan content\n- `reviser.md` → only the full revised plan content\n\nThis suggests the output is written directly into a task plan file without post-processing.\n\n### Structured review outputs\n\nBoth review prompts require the same high-level schema:\n\n- status\n- risk level\n- concise feedback\n- actionable suggestions when needed\n\nThis consistency makes plan review and code review easier to consume in a shared pipeline.\n\n### Plain-language implementation summary\n\n`coder.md` asks for a short summary rather than structured review metadata. That reflects its role: implementation output is primarily the repository diff, with the summary serving as a human-readable completion note.\n\n## Verification model\n\nA notable theme across the prompts is deterministic verification.\n\n### In `coder.md`\n\nVerification is described as:\n\n- service-owned\n- automatic after implementation\n- something the coder should respond to only when failures are caused by modified code\n\nThis prevents the implementation role from expanding scope to fix unrelated failures.\n\n### In `code_reviewer.md`\n\nVerification is elevated to an approval prerequisite:\n\n- approval requires passing verification\n- verification must correspond to the same `HEAD` commit being reviewed\n- verification summaries and bounded output excerpts should be checked for relevance\n\nTogether, these prompts enforce a workflow where implementation and approval are tied to concrete, reproducible evidence.\n\n## Git usage conventions\n\nSeveral prompts explicitly guide git usage. The recurring pattern is to prefer safe, targeted inspection over speculative history traversal.\n\nRecommended patterns include:\n\n- `git log --oneline -- <path>`\n- diffs against known refs\n- `git status --short`\n- `git diff`\n\nDiscouraged pattern:\n\n- guessing revisions like `HEAD~N` without confirming they exist\n\nThis is a practical safeguard for automation and ephemeral task branches, where history depth or branch shape may not match assumptions.\n\n## Relationship to the rest of the codebase\n\nThis module connects to the rest of the system through configuration and orchestration rather than imports or function calls.\n\n- DarkFactory configs reference these prompt files.\n- Workflow stages select prompts based on role.\n- Outputs from one role become inputs to later stages.\n- Repository state, git history, plan files, diffs, and verification results are the main external context these prompts operate on.\n\nA simplified workflow looks like this:\n\n```mermaid\nflowchart LR\n  T[Task] --> P[planner.md]\n  P --> R[reviewer.md]\n  R --> V[reviser.md]\n  V --> C[coder.md]\n  C --> Q[Verification]\n  Q --> CR[code_reviewer.md]\n```\n\nThis diagram is conceptual: the prompt files do not call each other directly, but they define the expected behavior at each stage.\n\n## Design characteristics\n\n### 1. Prompts are operational assets\n\nAlthough stored as Markdown, these files are effectively configuration-driven behavior definitions. Small wording changes can materially alter:\n\n- scope control\n- approval strictness\n- output format\n- verification expectations\n\n### 2. Prompts favor constrained autonomy\n\nEach role is given enough flexibility to inspect context and make decisions, but within explicit boundaries:\n\n- read-only vs write access\n- required output formats\n- scoped git usage\n- plan adherence\n- verification requirements\n\n### 3. Prompts encode process policy\n\nThe module captures workflow policy in natural language, including:\n\n- approval gates\n- revision expectations\n- implementation scope discipline\n- evidence-based review\n- separation between planning, coding, and review\n\n## Contributing and modifying prompts\n\nWhen editing files in this module, preserve the parts that downstream automation likely depends on.\n\n### Keep stable\n\n- role identity statements such as `You are the DarkFactory planner.`\n- explicit permission boundaries like “read-only” or “write access”\n- required output formats\n- enumerated status values:\n  - `APPROVED`\n  - `NEEDS_REVISION`\n  - `REJECTED`\n- instructions about verification and commit ownership\n\n### Change carefully\n\n- wording that affects scope or authority\n- output phrasing that may be parsed or expected by other tooling\n- git guidance that protects against unsafe assumptions\n- review criteria that determine approval thresholds\n\n### Good prompt changes\n\nGood changes usually:\n\n- clarify ambiguity\n- tighten scope\n- improve consistency across roles\n- reinforce existing workflow guarantees\n- reduce chances of unrelated edits or invalid approvals\n\n### Risky prompt changes\n\nRisky changes include:\n\n- allowing write behavior in read-only roles\n- weakening verification requirements\n- removing output structure\n- encouraging broad repository changes\n- making plan adherence optional\n- changing status vocabulary without updating consumers\n\n## Practical reading guide\n\nIf you are new to this module:\n\n- read `PROMPT.example.md` for directory intent\n- read `planner.md`, `reviewer.md`, and `reviser.md` together to understand the planning loop\n- read `coder.md` to understand implementation boundaries\n- read `code_reviewer.md` to understand final approval expectations\n\nIf you are debugging workflow behavior:\n\n- check whether the selected prompt matches the intended role\n- verify that required output shape still matches what downstream systems expect\n- inspect recent prompt edits for changes to permissions, verification language, or status values\n- compare `coder.md` and `code_reviewer.md` when implementation and approval behavior seem inconsistent\n\n## Summary\n\n`workspace/prompts/` is the role-instruction layer for DarkFactory’s task workflow. It contains Markdown prompt templates that define:\n\n- who can edit code\n- who must remain read-only\n- how plans are produced and revised\n- how reviews are structured\n- how verification affects approval\n- how tightly implementation must stay aligned with the approved plan\n\nThere is no executable logic in this module, but it is still a critical part of the system because it governs how automated roles interact with the repository and with each other.","other-pyproject-toml":"# Other — pyproject.toml\n\n# `pyproject.toml`\n\nProject-level packaging, dependency, and test configuration for the `darkfactory` codebase.\n\nThis file is the canonical source for how the project is built, what it depends on, which Python versions it supports, which packages are included in distributions, and how pytest should run the test suite. It does not contain executable application logic, but it directly shapes how the rest of the repository is installed and developed.\n\n## Purpose\n\n`pyproject.toml` defines:\n\n- the build backend used to create source and wheel distributions\n- project metadata such as package name, version, and Python requirement\n- runtime dependencies required by the application\n- optional development dependencies used for testing\n- package discovery rules for setuptools\n- pytest defaults for async tests and integration markers\n\nFor contributors, this file is the first place to check when:\n\n- adding or upgrading dependencies\n- changing supported Python versions\n- exposing new top-level packages in distributions\n- configuring test behavior\n- preparing the project for packaging or publishing\n\n## Build configuration\n\n```toml\n[build-system]\nrequires = [\"setuptools>=69\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n```\n\n### What this means\n\nThe project uses the standard PEP 517 build interface with `setuptools` as the build backend.\n\n- `setuptools>=69` provides packaging and package discovery behavior\n- `wheel` enables wheel builds\n- `setuptools.build_meta` is the backend invoked by tools such as `pip`, `build`, and other packaging frontends\n\n### Practical effect\n\nWhen someone installs the project from source, tooling reads this section first to create an isolated build environment. That environment must contain `setuptools` and `wheel` before the package can be built.\n\n## Project metadata\n\n```toml\n[project]\nname = \"darkfactory\"\nversion = \"0.1.0\"\ndescription = \"DarkFactory runtime bootstrap.\"\nreadme = \"AGENTS.md\"\nrequires-python = \">=3.12\"\n```\n\n### Fields\n\n#### `name`\n\nThe installable distribution name is `darkfactory`.\n\nThis is the name package managers and build tools use, even though the importable Python packages are discovered separately via setuptools.\n\n#### `version`\n\nCurrent version is `0.1.0`.\n\nThis is a static version string. There is no dynamic versioning configured here.\n\n#### `description`\n\nA short summary of the project:\n\n> DarkFactory runtime bootstrap.\n\nThis suggests the repository is focused on startup/runtime orchestration rather than being a generic library.\n\n#### `readme`\n\nThe project metadata points to `AGENTS.md` as the long-form package readme.\n\nThat means packaging tools will use `AGENTS.md` as the project description source instead of the more common `README.md`.\n\n#### `requires-python`\n\n```toml\nrequires-python = \">=3.12\"\n```\n\nThe codebase explicitly targets Python 3.12 and newer.\n\nThis matters for contributors because:\n\n- syntax and standard library usage may assume 3.12 features\n- CI and local environments should use Python 3.12+\n- dependency resolution will reject older interpreters\n\n## Runtime dependencies\n\n```toml\ndependencies = [\n    \"deepagents~=0.4.12\",\n    \"langchain-openai~=1.1.12\",\n    \"fastapi~=0.135.3\",\n    \"uvicorn[standard]~=0.42.0\",\n    \"httpx~=0.28.1\",\n    \"asyncpg~=0.31.0\",\n    \"psycopg[binary]~=3.2.12\",\n    \"langgraph~=1.1.4\",\n    \"langgraph-checkpoint-postgres~=3.0.5\",\n    \"pydantic~=2.12.5\",\n    \"pydantic-settings~=2.13.1\",\n    \"alembic~=1.18.4\",\n    \"psycopg2-binary~=2.9.11\",\n]\n```\n\nThese are installed for normal application use.\n\n### Dependency groups by role\n\n#### API and server runtime\n\n- `fastapi~=0.135.3`\n- `uvicorn[standard]~=0.42.0`\n\nThese indicate the project exposes an HTTP API and is expected to run under Uvicorn. `uvicorn[standard]` pulls in common production/runtime extras such as improved event loop and HTTP parser support where available.\n\n#### HTTP client integration\n\n- `httpx~=0.28.1`\n\nUsed for outbound HTTP calls, likely in async contexts given the rest of the stack.\n\n#### Data validation and settings\n\n- `pydantic~=2.12.5`\n- `pydantic-settings~=2.13.1`\n\nThese support typed models and environment/configuration loading. In a FastAPI-based codebase, these are typically foundational for request/response schemas and application settings.\n\n#### PostgreSQL access and migrations\n\n- `asyncpg~=0.31.0`\n- `psycopg[binary]~=3.2.12`\n- `psycopg2-binary~=2.9.11`\n- `alembic~=1.18.4`\n\nThis combination suggests the project interacts with PostgreSQL in multiple ways:\n\n- `asyncpg` for async database access\n- `psycopg` v3 for modern PostgreSQL connectivity\n- `psycopg2-binary` likely for compatibility with tooling or libraries that still expect psycopg2\n- `alembic` for schema migrations\n\nHaving both `psycopg` and `psycopg2-binary` is notable. Contributors should avoid removing one without checking migration tooling, adapters, or integration libraries that may depend on a specific driver.\n\n#### Agent / LLM / graph orchestration\n\n- `deepagents~=0.4.12`\n- `langchain-openai~=1.1.12`\n- `langgraph~=1.1.4`\n- `langgraph-checkpoint-postgres~=3.0.5`\n\nThese dependencies indicate the application likely includes agent execution and graph-based orchestration, with PostgreSQL-backed checkpointing/persistence.\n\n### Version specifier strategy\n\nAll dependencies use the compatible release operator `~=`.\n\nExamples:\n\n- `fastapi~=0.135.3`\n- `pydantic~=2.12.5`\n\nThis allows patch-level updates within the same compatible minor line while preventing broader upgrades that may introduce breaking changes.\n\nFor contributors, this means:\n\n- dependency updates are intentionally conservative\n- if you need a newer minor version, update `pyproject.toml` explicitly\n- lockfile behavior, if used elsewhere, should align with these constraints\n\n## Optional development dependencies\n\n```toml\n[project.optional-dependencies]\ndev = [\n    \"pytest~=9.0.2\",\n    \"pytest-asyncio~=1.3.0\",\n]\n```\n\nThe `dev` extra defines tooling needed for local development and testing.\n\n### Included tools\n\n- `pytest~=9.0.2` — test runner\n- `pytest-asyncio~=1.3.0` — async test support\n\n### Installing with development tools\n\nTypical install pattern:\n\n```bash\npip install -e .[dev]\n```\n\nThis installs the package in editable mode along with the test dependencies.\n\n## Package discovery\n\n```toml\n[tool.setuptools.packages.find]\ninclude = [\"api*\", \"lib*\"]\n```\n\nSetuptools is configured to discover and package only modules whose package names match:\n\n- `api*`\n- `lib*`\n\n### Why this matters\n\nOnly packages under those namespaces are included in built distributions.\n\nIf you add a new top-level package such as:\n\n- `api`\n- `api.routes`\n- `lib`\n- `lib.db`\n\nit will be included automatically.\n\nIf you add a new top-level package outside those prefixes, such as:\n\n- `scripts`\n- `workers`\n- `core`\n\nit will **not** be packaged unless this configuration is updated.\n\n### Contributor guidance\n\nWhen adding new code intended for distribution:\n\n- prefer placing it under `api` or `lib`, or\n- update `[tool.setuptools.packages.find]` to include the new namespace\n\nThis setting affects packaging, not necessarily local imports from the repository root. Code may appear to work locally while still being omitted from built artifacts if it falls outside the included package patterns.\n\n## Pytest configuration\n\n```toml\n[tool.pytest.ini_options]\nasyncio_mode = \"auto\"\ntestpaths = [\"tests\"]\nmarkers = [\n    \"integration: requires a disposable PostgreSQL database\",\n]\n```\n\nThis section centralizes pytest defaults.\n\n### `asyncio_mode = \"auto\"`\n\nPytest will automatically manage async test execution where appropriate.\n\nThis reduces boilerplate for async tests and works with `pytest-asyncio`.\n\n### `testpaths = [\"tests\"]`\n\nPytest will look for tests under the `tests` directory by default.\n\nThis keeps discovery focused and avoids accidentally collecting non-test modules elsewhere in the repository.\n\n### Custom marker: `integration`\n\n```toml\n\"integration: requires a disposable PostgreSQL database\"\n```\n\nThe project declares an `integration` marker for tests that depend on a disposable PostgreSQL instance.\n\nThis is important because:\n\n- it documents that some tests require external infrastructure\n- it prevents unknown-marker warnings\n- it gives contributors a clear way to separate fast unit tests from DB-backed integration tests\n\nTypical usage in tests would be:\n\n```python\nimport pytest\n\n@pytest.mark.integration\nasync def test_something_against_postgres():\n    ...\n```\n\nTypical selection patterns:\n\n```bash\npytest\npytest -m integration\npytest -m \"not integration\"\n```\n\n## How this file connects to the rest of the codebase\n\nAlthough `pyproject.toml` has no internal call graph, it is upstream of nearly every developer workflow:\n\n```mermaid\nflowchart TD\n    A[pyproject.toml] --> B[Build tooling]\n    A --> C[Dependency installation]\n    A --> D[Package discovery]\n    A --> E[Pytest configuration]\n    C --> F[api* packages]\n    C --> G[lib* packages]\n    E --> H[tests/]\n```\n\n### Build and install path\n\nPackaging tools read `pyproject.toml` to determine:\n\n- how to build the project\n- which dependencies to install\n- which packages to include in the distribution\n\n### Runtime implications\n\nThe dependency list strongly suggests the rest of the repository contains:\n\n- FastAPI application code under `api*`\n- shared/runtime/database logic under `lib*`\n- PostgreSQL-backed persistence and migrations\n- agent or graph orchestration components using LangGraph and related libraries\n\n### Test implications\n\nPytest configuration here determines how contributors run and categorize tests across the repository, especially async and PostgreSQL-backed integration tests.\n\n## Common maintenance tasks\n\n## Add a new runtime dependency\n\nAdd it under `[project].dependencies`:\n\n```toml\ndependencies = [\n    ...\n    \"new-package~=1.2.3\",\n]\n```\n\nUse a version constraint consistent with the existing strategy unless there is a reason to do otherwise.\n\n## Add a development-only dependency\n\nAdd it under `[project.optional-dependencies].dev`:\n\n```toml\ndev = [\n    ...\n    \"ruff~=0.x.y\",\n]\n```\n\nThen install with:\n\n```bash\npip install -e .[dev]\n```\n\n## Add a new packaged namespace\n\nIf new code lives outside `api*` or `lib*`, update:\n\n```toml\n[tool.setuptools.packages.find]\ninclude = [\"api*\", \"lib*\", \"newnamespace*\"]\n```\n\n## Change supported Python versions\n\nUpdate:\n\n```toml\nrequires-python = \">=3.12\"\n```\n\nBe sure the rest of the code and dependency set remain compatible.\n\n## Add a new pytest marker\n\nExtend the `markers` list so pytest recognizes it and contributors understand its purpose.\n\nExample:\n\n```toml\nmarkers = [\n    \"integration: requires a disposable PostgreSQL database\",\n    \"slow: long-running tests\",\n]\n```\n\n## Things to watch for\n\n### Dual PostgreSQL drivers\n\nThe presence of both `psycopg[binary]` and `psycopg2-binary` is intentional unless proven otherwise. Before removing either one, verify:\n\n- Alembic configuration\n- ORM or migration integrations\n- any third-party libraries expecting a specific driver\n\n### Packaging gaps from namespace filtering\n\nBecause only `api*` and `lib*` are included, new top-level packages can be silently excluded from distributions.\n\n### Python 3.12 requirement\n\nAny tooling, CI jobs, Docker images, or local virtual environments must satisfy the `>=3.12` constraint.\n\n### Readme source\n\nThe package metadata uses `AGENTS.md`. If packaging or publishing workflows expect `README.md`, they must be updated accordingly rather than assuming the default.\n\n## Summary\n\n`pyproject.toml` is the repository’s packaging and tooling contract. It defines:\n\n- how `darkfactory` is built\n- which Python versions are supported\n- which runtime and development dependencies are installed\n- which package namespaces are distributed\n- how pytest discovers and runs tests\n\nFor most structural changes to the project—new dependencies, new package roots, new test conventions, or Python version changes—this file is one of the primary places that must be updated.","other-skills":"# Other — skills\n\n# Other — skills\n\nThe `workspace/skills` module defines the repository’s skill format: lightweight, self-describing Markdown files that package reusable instructions for the agent.\n\nIn this codebase, the module is represented by a single example file:\n\n- `workspace/skills/SKILL.example.md`\n\nAlthough there is no executable code in this module, it establishes an important content contract for how skills are authored, discovered, and maintained.\n\n## Purpose\n\nA skill is a reusable instruction bundle for a specific tool, workflow, or domain task. Instead of embedding the same guidance repeatedly in prompts or task-specific logic, contributors can define a skill once and reuse it whenever the agent needs that capability.\n\nThe example file exists to show:\n\n- the expected file structure\n- the required metadata shape\n- the style of the human-readable instructions\n- the intended scope of a skill\n\nThis module is documentation-driven rather than code-driven: its behavior comes from convention and from whatever external system reads these skill files.\n\n## File Overview\n\n## `SKILL.example.md`\n\nThis file is a template-like example of a skill document. It contains two main parts:\n\n1. frontmatter metadata\n2. Markdown body content\n\n### Frontmatter\n\nThe file begins with YAML frontmatter:\n\n```md\n---\nname: example-skill\ndescription: Describe when this skill should be used.\nbins: []\nenv: []\nalways_load: false\n---\n```\n\nThese fields define the machine-readable identity and loading characteristics of the skill.\n\n#### `name`\n\n```yaml\nname: example-skill\n```\n\nA unique identifier for the skill.\n\nUse this as the canonical name for the skill. It should be stable, concise, and descriptive enough to distinguish it from other skills.\n\n#### `description`\n\n```yaml\ndescription: Describe when this skill should be used.\n```\n\nA short summary of when the skill applies.\n\nThis is the most important discovery field. It should help a caller or loader decide whether the skill is relevant to the current task.\n\nGood descriptions are:\n\n- task-oriented\n- specific\n- short enough to scan quickly\n\n#### `bins`\n\n```yaml\nbins: []\n```\n\nA list of command-line tools or binaries associated with the skill.\n\nThis field is empty in the example, but in a real skill it likely communicates tool dependencies or expected command availability. For example, a skill centered on Git, Docker, or Terraform would likely declare those tools here.\n\n#### `env`\n\n```yaml\nenv: []\n```\n\nA list of environment variables associated with the skill.\n\nThis field is also empty in the example. In practice, it can document runtime prerequisites such as API keys, configuration variables, or environment-specific settings needed to execute the workflow described by the skill.\n\n#### `always_load`\n\n```yaml\nalways_load: false\n```\n\nControls whether the skill should be loaded unconditionally.\n\nThe example sets this to `false`, which implies the default behavior is selective loading based on relevance rather than automatic inclusion.\n\n### Markdown Body\n\nAfter the frontmatter, the file defines the human-readable content of the skill:\n\n```md\n# Example Skill\n\nUse this skill when the agent needs repeatable instructions for a specific tool,\nworkflow, or domain task.\n\n## Steps\n\n1. Explain the goal briefly.\n2. List the required commands, tools, or checks.\n3. Include examples the agent can follow directly.\n```\n\nThis body is the actual operational guidance the agent would use.\n\nThe example demonstrates a simple pattern:\n\n- a title\n- a short statement of applicability\n- a procedural section with concrete steps\n\n## How the Module Works\n\nThere are no functions, classes, or internal calls in this module. Its “execution model” is content interpretation.\n\nA consumer of this module would typically:\n\n1. read the Markdown file\n2. parse the YAML frontmatter\n3. use metadata such as `name`, `description`, and `always_load` to decide whether to load the skill\n4. present or apply the Markdown body as reusable instructions\n\nBecause the provided reference shows:\n\n- **Internal calls:** None\n- **Outgoing calls:** None\n- **Incoming calls:** None\n- **Execution flows:** None detected\n\nthis module should be understood as a static content definition rather than an active runtime component.\n\n## Conceptual Flow\n\n```mermaid\nflowchart TD\n    A[Skill file] --> B[YAML frontmatter]\n    A --> C[Markdown instructions]\n    B --> D[Selection/loading decision]\n    C --> E[Agent follows reusable workflow]\n```\n\nThis diagram is conceptual rather than code-level: it reflects how the file is intended to be consumed.\n\n## Key Components and Authoring Pattern\n\nThe example establishes a repeatable authoring pattern for all skills.\n\n### 1. Metadata first\n\nEvery skill should begin with frontmatter. This makes the file both human-readable and machine-readable.\n\nAt minimum, contributors should preserve the same field structure shown in `SKILL.example.md` unless the broader system explicitly supports additional fields.\n\n### 2. Clear applicability statement\n\nThe opening paragraph should explain when to use the skill. This complements the `description` field by giving slightly more context in natural language.\n\n### 3. Actionable steps\n\nThe `## Steps` section is the core of the skill. It should contain instructions that are:\n\n- repeatable\n- concrete\n- easy to follow without extra interpretation\n- directly useful during task execution\n\n### 4. Examples where helpful\n\nThe example explicitly recommends including examples the agent can follow directly. This is especially important for command-heavy or multi-step workflows.\n\n## Relationship to the Rest of the Codebase\n\nThis module connects to the rest of the codebase through convention, not direct imports or calls.\n\n### What it provides\n\nIt provides a standard format for reusable operational knowledge.\n\n### What it does not provide\n\nIt does not provide:\n\n- executable logic\n- helper functions\n- runtime orchestration\n- validation code within this module itself\n\n### Likely integration points\n\nEven though no call graph edges are present, other parts of the system may conceptually rely on this module to:\n\n- discover available skills\n- decide which skills to load for a task\n- surface instructions to the agent at runtime\n- document tool and environment prerequisites\n\nThose integrations are external to this module and are not defined in the provided source.\n\n## Contributing\n\nWhen adding a new skill, follow the structure demonstrated by `SKILL.example.md`.\n\n### Recommended checklist\n\n- Choose a stable `name`\n- Write a precise `description`\n- Populate `bins` with required command-line tools\n- Populate `env` with required environment variables\n- Set `always_load` only when unconditional loading is justified\n- Add a clear title and usage statement\n- Write step-by-step instructions\n- Include examples for commands, checks, or expected outputs when useful\n\n### Writing guidance\n\nPrefer skills that are:\n\n- narrowly scoped\n- reusable across tasks\n- explicit about prerequisites\n- procedural rather than abstract\n\nAvoid skills that are:\n\n- too broad to apply consistently\n- missing tool or environment assumptions\n- vague about expected actions\n- duplicative of existing skills\n\n## Example Template\n\nA contributor can use the example file as a starting point:\n\n```md\n---\nname: my-skill\ndescription: When to use this skill.\nbins: [tool1, tool2]\nenv: [REQUIRED_VAR]\nalways_load: false\n---\n\n# My Skill\n\nUse this skill when the task requires this specific workflow.\n\n## Steps\n\n1. State the objective.\n2. Run or verify the required tools.\n3. Perform the workflow in order.\n4. Validate the result.\n5. Include a concrete example if possible.\n```\n\n## Maintenance Notes\n\nBecause this module is convention-based, consistency matters more than implementation detail.\n\nWhen reviewing changes:\n\n- verify frontmatter remains valid YAML\n- ensure the `description` matches the body content\n- confirm `bins` and `env` reflect actual prerequisites\n- check that instructions are actionable and testable\n- keep the skill focused on one repeatable capability\n\n## Summary\n\nThe `workspace/skills` module is a content schema for reusable agent skills. `SKILL.example.md` serves as the canonical example of that schema, combining structured metadata with procedural Markdown instructions. While it has no runtime logic of its own, it is an important extension point for organizing repeatable workflows and making them discoverable across the broader system.","other-tests":"# Other — tests\n\n# Other — tests\n\nThe `tests/` module is the project’s executable specification. It verifies behavior across configuration, agent construction, graph execution, Git/GitHub integration, verification, logging, and application startup.\n\nThis test suite is not just unit coverage for isolated helpers. It also defines important system contracts:\n\n- what startup accepts or rejects\n- which agent roles are read-only vs shell-capable\n- how prompts are assembled\n- how task execution resumes after restart\n- how Git and GitHub failures are surfaced\n- what must and must not appear in logs\n- how integration tests isolate PostgreSQL state\n\n## Scope\n\nThe visible test files cover:\n\n- shared fixtures and disposable infrastructure in `tests/conftest.py`\n- agent backend, factory, registry, middleware, and invocation behavior\n- app startup and task recovery behavior\n- boundary logging for Git and verification\n- coder and code reviewer prompt/node behavior\n- configuration validation\n- error hierarchy guarantees\n- execution scope and GitHub context injection\n- Git client and Git tool behavior\n- GitHub client transport behavior\n- GitHub comment upsert behavior\n- additional graph/API/repository-context/verification tests referenced by the call graph\n\n## Test architecture\n\nThe suite mixes three styles:\n\n1. **Pure unit tests**\n   - validate small helpers and model behavior\n   - use `monkeypatch`, fake agents, fake subprocesses, and fake HTTP transports\n\n2. **Filesystem/Git integration tests**\n   - create real temporary repositories\n   - run real `git` commands\n   - verify clone, branch, commit, push, and cleanup behavior\n\n3. **Database-backed integration tests**\n   - provision disposable PostgreSQL databases\n   - run Alembic migrations\n   - execute app lifespan and graph recovery flows against real persistence\n\n## Shared fixtures and infrastructure\n\n## `tests/conftest.py`\n\n`conftest.py` provides the common environment, repository, and database fixtures used throughout the suite.\n\n### Environment fixtures\n\n#### `clean_runtime_env`\nRemoves DarkFactory-specific environment variables listed in `TEST_ENV_VARS`.\n\nUse this when testing `Settings` or any code path that reads process environment and should not inherit developer machine state.\n\n#### `workspace_root`\nCreates the minimum workspace structure expected by settings and prompt-loading code:\n\n- `prompts/`\n- `skills/`\n- `memory/`\n\nThis fixture is intentionally minimal. Tests that need prompt files or skills create them explicitly.\n\n#### `target_repo_path`\nCreates a placeholder directory that is *not* a Git repository. Used for startup validation tests.\n\n### Git repository fixtures\n\n#### `bare_git_repo_path`\nCreates a bare repository with:\n\n```bash\ngit init --bare <path>\n```\n\nUsed to verify startup accepts a valid bare target repo.\n\n#### `checked_out_git_repo_path`\nCreates a normal checked-out repository with:\n\n```bash\ngit init <path>\n```\n\nUsed to verify startup rejects checked-out repos unless `receive.denyCurrentBranch=updateInstead` is configured.\n\n#### `SeededGitRepo`\nA frozen dataclass containing:\n\n- `bare_repo_path`\n- `source_repo_path`\n- `default_branch`\n\n#### `seeded_git_repo`\nBuilds a realistic origin/source pair:\n\n- initializes a bare origin repo\n- initializes a working repo on branch `main`\n- configures Git user identity\n- commits `README.md`\n- pushes `main` to origin\n- updates bare repo `HEAD` to `refs/heads/main`\n\nThis fixture is the foundation for Git client, Git tool, branch setup, and app recovery tests.\n\n### Database fixtures\n\nThe database fixtures create isolated PostgreSQL databases per test session or per test, depending on the use case.\n\n#### `integration_database_url`\nReads the base database URL from:\n\n- `TEST_DATABASE_URL`, or\n- `DATABASE_URL`\n\nIf neither is set, integration tests are skipped.\n\n#### `integration_database_admin_url`\nReturns the admin connection string used for `CREATE DATABASE` / `DROP DATABASE`.\n\nIf `TEST_DATABASE_ADMIN_URL` is not set, it rewrites the database name in `integration_database_url` to `postgres` using `_replace_database_name`.\n\n#### `test_database_url`\nSession-scoped fixture that:\n\n1. generates a unique database name\n2. creates the database via `_create_database`\n3. yields a connection URL pointing at that database\n4. drops the database in teardown via `_drop_database`\n\n#### `outdated_database_url`\nLike `test_database_url`, but intentionally leaves the database empty and unmigrated. Used for schema validation tests.\n\n#### `migrated_test_database_url`\nRuns:\n\n```python\ncommand.upgrade(get_alembic_config(test_database_url), \"head\")\n```\n\nThis ensures integration tests run against the current schema.\n\n#### `db_connection`\nAsync fixture that wraps each test in a rollback transaction using `asyncpg`:\n\n- connects to `migrated_test_database_url`\n- starts a transaction\n- yields the connection\n- rolls back after the test\n- closes the connection\n\nThis gives test-level isolation without recreating the database for every test.\n\n### Internal helpers\n\n#### `_create_database(admin_url, database_name)`\nUses `psycopg2` with autocommit to execute:\n\n```sql\nCREATE DATABASE <identifier>\n```\n\n#### `_drop_database(admin_url, database_name)`\nTerminates active connections with `pg_terminate_backend` and then drops the database.\n\n#### `_replace_database_name(database_url, database_name)`\nRewrites only the path portion of a database URL using `urlsplit` / `urlunsplit`.\n\n#### `_run_git(*args)`\nThin wrapper around:\n\n```python\nsubprocess.run([\"git\", *args], check=True)\n```\n\nUsed by `seeded_git_repo`.\n\n## Execution flow for integration database setup\n\n```mermaid\nflowchart TD\n    A[integration_database_url] --> B[integration_database_admin_url]\n    A --> C[test_database_url]\n    B --> C\n    C --> D[_create_database]\n    C --> E[migrated_test_database_url]\n    E --> F[Alembic upgrade head]\n    E --> G[db_connection]\n    C --> H[_drop_database]\n```\n\nThis is the core lifecycle for database-backed tests.\n\n---\n\n## Agent-related tests\n\n## `tests/test_agent_backends.py`\n\nThese tests define the contract for `api.agents.backends.ReadOnlyBackend`.\n\n### What is verified\n\n`ReadOnlyBackend` must:\n\n- allow reads\n- block writes\n- block edits\n- block uploads\n- preserve on-disk state\n- behave the same for sync and async APIs\n\n### Covered methods\n\nSync path:\n\n- `read`\n- `write`\n- `edit`\n- `upload_files`\n\nAsync path:\n\n- `aread`\n- `awrite`\n- `aedit`\n- `aupload_files`\n\n### Important assertions\n\n- write operations return `READ_ONLY_WRITE_ERROR`\n- edit operations return `READ_ONLY_EDIT_ERROR`\n- uploads return `\"permission_denied\"`\n- no blocked file is created on disk\n\nThese tests are important because agent role safety depends on backend enforcement, not just prompt instructions.\n\n---\n\n## `tests/test_agent_factory.py`\n\nThis file is the main specification for agent construction.\n\n### Local test helpers\n\n#### `MockStructuredResponse`\nA simple `pydantic.BaseModel` used to verify structured response wiring.\n\n#### `MockAgent`\nCaptures `ainvoke` calls and returns a predefined result.\n\n#### `RaisingAgent`\nRaises `RuntimeError(\"boom\")` from `ainvoke` to test failure logging.\n\n#### `write_rule_file`\nCreates `.cursor/rules/<file>` content under a repo root.\n\n#### `build_repo_context`\nBuilds a `RepositoryContext` from `(relative_path, content)` tuples.\n\n#### `build_tool_registration`\nCreates a `ToolRegistration` with a `ToolConfig` and a materializer returning a `ToolAdapter`.\n\nThis helper is used to verify tool registry materialization and permission scoping without depending on production tool implementations.\n\n### `create_agent` behavior under test\n\nThe tests verify that `api.agents.factory.create_agent`:\n\n- loads prompt templates from `workspace_root / \"prompts\"`\n- appends repository context when provided\n- treats repository context as advisory\n- materializes tools from the registry\n- chooses the correct backend by role\n- passes `response_format` only when requested\n- omits optional kwargs when not needed\n- publishes model API keys from settings into environment\n- uses fresh repository context on each call\n- omits repository context entirely when absent\n\n### Backend selection rules\n\nThe tests establish a role/capability boundary:\n\n- planner/reviewer-style filesystem agents use `ReadOnlyBackend`\n- coder-style shell agents use `LocalShellBackend`\n\n### Middleware rules\n\n`create_agent` must attach `DisableSubagentToolMiddleware` for review-only roles such as:\n\n- `reviewer`\n- `code_reviewer`\n\nIt must *not* attach that middleware for shell-capable agents like `coder`.\n\n### Registry-based construction\n\nThe suite verifies both:\n\n- direct config construction via `create_agent`\n- lookup-based construction via `create_registered_agent`\n\nIt also verifies that registry updates are respected, not cached incorrectly.\n\n### Skill agent construction\n\n`create_skill_agent` is tested to ensure it routes through the same shared factory behavior while allowing:\n\n- `skills` override\n- `backend_override`\n- `tool_adapters_override`\n\n### Invocation helpers\n\nThe tests for `api.agents.invoke` define the message/result contract:\n\n#### `invoke_agent`\nMust call `agent.ainvoke` with:\n\n- a `HumanMessage` containing the prompt text\n- config `{\"configurable\": {\"thread_id\": <thread_id>}}`\n\n#### `extract_text`\nMust extract text from the returned `messages`.\n\n#### `extract_structured`\nMust extract `structured_response`.\n\nIf expected fields are missing or malformed, these helpers must raise `AgentError`.\n\n### Logging contract\n\n`invoke_agent` must log:\n\n- start with `thread_id` and optional `agent`\n- completion with elapsed time and context\n- failure with elapsed time and context\n\nThese tests are especially valuable because they lock down observability behavior, not just functional output.\n\n---\n\n## `tests/test_agent_registry.py`\n\nThis file specifies the behavior of `api.agents.registry.AgentRegistry`.\n\n### Verified behavior\n\n- `get(\"missing\")` raises `AgentNotFoundError`\n- `reload(...)` replaces the entire registry contents\n- `build_default_agent_registry()` registers the expected default roles\n\n### Default roles asserted\n\nThe expected default registry order is:\n\n- `planner`\n- `reviewer`\n- `reviser`\n- `coder`\n- `code_reviewer`\n\nThe tests also verify role-specific config details, such as `coder.backend == \"shell\"`.\n\nThis file acts as a guardrail for phase defaults and registry semantics.\n\n---\n\n## Application startup and recovery tests\n\n## `tests/test_app.py`\n\nThis file exercises `api.app.create_app`, startup validation, clone cleanup, and task recovery.\n\n### Local test doubles\n\n#### `MockTextAgent`\nReturns an `AIMessage` with text content.\n\n#### `MockStructuredAgent`\nReturns both:\n\n- `messages`\n- `structured_response` containing `ReviewFeedback`\n\n#### `NoRecoveryTaskService`\nOverrides:\n\n- `recover_running_tasks`\n- `recover_publication_tasks`\n\nto no-op, allowing tests to isolate startup behavior unrelated to recovery.\n\n#### `clean_task_tables`\nAutouse async fixture that deletes from:\n\n- `artifacts`\n- `tasks`\n\nbefore and after each test using the migrated integration database.\n\n#### `_wait_for_task_status`\nPolling helper that repeatedly loads a task via `get_task` until it reaches an expected status.\n\n#### `_build_planning_service_factory`\nBuilds a `TaskService` whose planning graph uses injected mock planner/reviewer/reviser agents.\n\n### App registry initialization\n\n`test_create_app_registers_default_agent_and_tool_registries` verifies that `create_app` initializes:\n\n- default agent registry\n- default tool registry\n- empty skill catalog\n\n### Target repo validation\n\nThe `_get_target_repo_issues` tests define startup acceptance rules:\n\nAccepted:\n\n- bare Git repo\n- checked-out repo with `receive.denyCurrentBranch=updateInstead`\n\nRejected:\n\n- non-Git directory\n- nested directory inside a Git repo that is not the repo root\n- checked-out repo without `updateInstead`\n\n### Clone cleanup on startup\n\nThe startup lifecycle must remove orphan clone directories while preserving:\n\n- clones for active tasks\n- recent failed-task clones that should be retained\n\nThese tests verify that clone cleanup is driven by task state, not just filesystem presence.\n\n### Recovery from checkpoints\n\nThe recovery tests are some of the most important integration tests in the suite.\n\nThey verify that after an interrupted planning graph:\n\n- app startup resumes a running task from checkpoint when the clone still exists\n- task eventually reaches `succeeded`\n- publication completes\n- clone cleanup happens after success\n\nThey also verify the failure path:\n\n- if the clone is missing during recovery, startup marks the task `failed`\n- the task error mentions the missing working clone\n\nThese tests connect `create_app`, `TaskService`, checkpoint persistence, graph execution, Git clone lifecycle, and task repository state.\n\n---\n\n## Logging boundary tests\n\n## `tests/test_boundary_logging.py`\n\nThis file verifies that logs expose operational boundaries without leaking sensitive or noisy payloads.\n\n### Shared helpers\n\n#### `MessageCollector`\nA logging handler that appends formatted messages to a list.\n\n#### `_FakeProcess`\nSimulates a subprocess with fixed `returncode`, `stdout`, and `stderr`.\n\n#### `_TimeoutProcess`\nSimulates a hanging subprocess on first `communicate()` call and supports `kill()`.\n\n#### `capture_log_messages`\nContext manager that attaches `MessageCollector` to `lib.logger.logger`.\n\n#### `_messages_with_prefix`\nFilters captured messages by prefix.\n\n### Git logging contracts\n\nThe tests for `lib.git.GitClient` verify:\n\n#### `commit_all`\nMust log:\n\n- `Git commit started.`\n- `Git commit created.` on success\n- `Git commit failed.` on failure\n\nIt must include context such as:\n\n- `repo_path`\n- `commit_subject`\n- `has_commit_body`\n\nIt must *not* log the commit body text.\n\n#### `push` / `push_refspec`\nMust log start/completion or start/failure with:\n\n- `repo_path`\n- redacted `remote`\n- `branch_name` or `refspec`\n\nCredentials embedded in HTTPS remotes must be redacted from logs.\n\n### Verification logging contracts\n\nThe tests for `api.verification.VerificationRunner.run` verify boundary logging for:\n\n- successful verification\n- failed verification\n- timeout\n- startup error when command cannot launch\n\nLogs must include structured context such as:\n\n- `code_attempt`\n- `head_commit`\n- `command`\n- `verification_status`\n- `duration_seconds`\n- `exit_code` where applicable\n- `failing_test_count`\n\nLogs must *not* include raw stdout/stderr excerpts.\n\nThis file is effectively the privacy and observability spec for Git and verification boundaries.\n\n---\n\n## Prompt construction and implementation-node tests\n\n## `tests/test_code_reviewer.py`\n\nThis file tests `_build_code_reviewer_prompt` in `api.nodes.code_reviewer`.\n\n### Verified behavior\n\nWhen `ImplementationState.verification` is present, the prompt must include:\n\n- verification evidence section\n- verification status\n- verified commit\n- reviewed HEAD commit\n- command\n- summary\n- output excerpt\n- guidance tying review to the verified commit\n\nWhen verification is absent, the prompt must omit the verification evidence section entirely.\n\nThis ensures the code reviewer sees deterministic test evidence only when it exists.\n\n---\n\n## `tests/test_coder.py`\n\nThis file covers both prompt construction and the behavior of `build_coder_node`.\n\n### Local test doubles\n\n#### `MockCoderAgent`\nWrites a file into the clone during `ainvoke`, returns a summary, and records calls.\n\n#### `MockCoderAgentThatCommits`\nWrites a file and also creates a Git commit directly. This is intentionally invalid behavior used to verify enforcement.\n\n### `build_coder_node` behavior\n\nThe tests verify that the coder node:\n\n- runs against a prepared clone created by `build_setup_branch_node`\n- allows the agent to modify files\n- auto-commits dirty changes if the agent did not commit\n- rejects agents that create commits directly\n- increments `code_attempt`\n- passes `thread_id` through invocation config\n- stops with `status == \"max_iterations\"` when attempts are exhausted\n\n### Prompt content\n\n`_build_coder_prompt` must include:\n\n- task description\n- plan content\n- likely starting files from `TaskContext.relevant_files`\n- GitHub issue context when present\n- GitHub PR context when present\n- active human change requests\n- extra context fields\n\nIt must also include verification context for non-passing prior verification results:\n\n- `failed`\n- `error`\n- `timeout`\n\nBut it must omit verification context for:\n\n- first pass with no verification\n- passing verification results\n\nThese tests define the retry loop contract: coder retries should be informed by prior deterministic verification failures, but not cluttered when verification already passed.\n\n---\n\n## Configuration and error tests\n\n## `tests/test_config.py`\n\nThis file specifies `lib.config.Settings` validation and startup issue reporting.\n\n### Verified behavior\n\n- missing required env vars raises `ValidationError`\n- missing both model API keys raises `ValidationError`\n- nonexistent `WORKSPACE_ROOT` and `TARGET_REPO` appear in `get_startup_issues()`\n- overlapping `CLONE_ROOT` and `WORKSPACE_ROOT` appears in `get_startup_issues()`\n\nThese tests distinguish between:\n\n- **hard validation failures** during settings construction\n- **startup issues** collected for later reporting\n\nThat distinction matters for app boot behavior.\n\n---\n\n## `tests/test_errors.py`\n\nThis file locks down the exception hierarchy.\n\n### Verified behavior\n\nThe following must inherit from `lib.errors.DarkFactoryError`:\n\n- `TaskNotFoundError`\n- `ArtifactNotFoundError`\n- `InvalidStateTransitionError`\n- `GitOperationError`\n- `ExecutionScopeError`\n- `LLMParseError`\n- `ConfigurationError`\n- `AgentError`\n- `GitHubAPIError`\n- `GitHubRateLimitError`\n- `GitHubNotFoundError`\n\nIt also verifies:\n\n- `GitHubRateLimitError` and `GitHubNotFoundError` inherit from `GitHubAPIError`\n- `DarkFactoryError` preserves `correlation_id` and `details`\n\nThis is important because many higher-level handlers rely on shared base-class semantics.\n\n---\n\n## Execution and scope tests\n\n## `tests/test_execution_github_context.py`\n\nThis file verifies that `api.services.execution.run_task_graph` injects GitHub-derived context into graph input.\n\n### Local test doubles\n\n#### `CapturingGraph`\nCaptures the input passed to `ainvoke` and returns a simple terminal result.\n\n#### `FakeGitHubExecutionClient`\nImplements:\n\n- `get_issue`\n- `list_issue_comments`\n- `get_pull_request`\n- `list_pr_reviews`\n\nwith deterministic payloads.\n\n### Verified behavior\n\nWhen a `TaskRecord` includes GitHub linkage fields, `run_task_graph` must enrich graph input with:\n\n- `github_issue`\n- `github_pr`\n\nincluding nested derived fields such as:\n\n- recent comments\n- human change requests extracted from reviews\n\nThis test is the bridge between persistence-layer task metadata and prompt/context-layer execution input.\n\n---\n\n## `tests/test_execution_scope.py`\n\nThis file verifies path safety helpers in `api.nodes.execution_scope`.\n\n### `get_task_clone_path`\nMust resolve a task clone path as a child of `clone_root`.\n\n### `resolve_clone_write_path`\nMust reject parent-directory escape attempts such as:\n\n```text\n../workspace/prompts/hack.md\n```\n\nby raising `ExecutionScopeError`.\n\nThis is a critical sandboxing test: shell-capable agents may write inside their clone, but not outside it.\n\n---\n\n## Git tests\n\n## `tests/test_git_client.py`\n\nThis file exercises real Git operations through `lib.git.GitClient`.\n\n### End-to-end Git lifecycle\n\n`test_git_client_clone_branch_commit_diff_push_and_cleanup` verifies:\n\n- clone from seeded bare repo\n- create branch\n- write file\n- commit changes\n- inspect current branch\n- inspect head commit\n- diff between refs\n- show file content at ref\n- push branch\n- verify pushed ref in origin\n- cleanup clone directory\n\nThis is the core integration test for the Git abstraction.\n\n### Explicit remote publication\n\n`test_git_client_push_refspec_pushes_to_explicit_remote_path` verifies `push_refspec` against a separate bare remote path.\n\n### Auth config helpers\n\nThe tests for `_build_https_push_auth_config` and `_build_git_env` verify:\n\n- GitHub token is converted into an HTTP extraheader\n- non-HTTP remotes are ignored\n- auth is injected via environment/config, not command args\n- existing `GIT_CONFIG_*` entries are preserved and extended\n\nThese tests are especially important for preventing token leakage.\n\n### Orphan clone cleanup\n\n`cleanup_orphan_clones` must:\n\n- remove clones not associated with active tasks\n- preserve active task clones\n- preserve retained failed-task clones when requested\n\n---\n\n## `tests/test_git_tool.py`\n\nThis file specifies the behavior of `api.tools.git.create_git_tool`.\n\n### Verified behavior\n\nThe Git tool must support safe read-only commands such as:\n\n- `show`\n- `diff`\n\nIt must also provide helpful guidance for common failure modes.\n\n### Important cases\n\n- invalid revision ranges should return explanatory guidance\n- `git diff` exit code `1` should be treated as normal when differences exist\n- `--quiet` diff with exit code `1` should return an explanatory message\n- invalid `show` revisions should return revision guidance\n- absolute paths after `--` must be rejected\n- parent-relative paths after `--` must be rejected\n\nThese tests define the tool’s UX contract for LLM-facing Git access: safe, read-only, and explanatory rather than cryptic.\n\n---\n\n## GitHub client and comment tests\n\n## `tests/test_github_client.py`\n\nThis file is the transport-level specification for `lib.github.client.AsyncGitHubClient`.\n\n### Pagination and filtering\n\n`list_issues` must:\n\n- follow `Link` pagination\n- filter out pull requests returned by the issues endpoint\n- send the expected `Authorization` header\n\n### Retry behavior\n\nThe client must retry:\n\n- rate limits using `Retry-After`\n- transient server errors using exponential backoff\n\nThe tests inject a fake `sleep_func` to verify exact delays.\n\n### Error mapping\n\nHTTP responses must map to typed exceptions:\n\n- `404` → `GitHubNotFoundError`\n- `429` → `GitHubRateLimitError`\n- validation failures → `GitHubAPIError` with structured details\n\n### Mutation endpoints\n\nThe tests verify payloads and methods for:\n\n- `create_pull_request`\n- `merge_pull_request`\n- `add_labels`\n- `post_comment`\n- `edit_comment`\n\n### Smoke test\n\n`test_async_github_client_smoke_fetch_comment_post_flow` is an opt-in live integration test gated by environment variables:\n\n- `GH_TOKEN`\n- `GH_REPO`\n- `GH_SMOKE_TEST_ISSUE_NUMBER`\n\nIf not configured, it skips.\n\nThis is useful for validating real GitHub connectivity without making the whole suite depend on external services.\n\n---\n\n## `tests/test_github_comments.py`\n\nAlthough the source excerpt is truncated, the imports and setup show that this file tests:\n\n- `DarkFactoryCommentMarker`\n- `format_task_comment`\n- `parse_comment_marker`\n- `upsert_task_comment`\n\nusing a fake GitHub comment client.\n\nThis area likely defines idempotent issue-comment behavior for task status publication, including marker parsing and update-vs-create logic.\n\nBecause publication flows depend on comment upserts being stable and repeatable, these tests are part of the GitHub publication contract.\n\n---\n\n## Additional tests referenced by the call graph\n\nThe provided call graph references several additional test modules not fully shown in the source excerpt. They are still part of the `tests/` module’s behavior surface and worth understanding when contributing.\n\n### API and task execution tests\nReferenced examples include:\n\n- `tests/test_task_api.py`\n- `tests/test_implementation_task_api.py`\n\nThese appear to verify HTTP task creation/execution flows, background execution, checkpointed graph boundaries, and GitHub linkage fields.\n\n### Graph tests\nReferenced examples include:\n\n- `tests/test_planning_graph.py`\n- `tests/test_implementation_graph.py`\n\nThese likely verify graph routing, review/revision loops, provenance persistence, verification routing, and max-attempt termination.\n\n### Repository context discovery tests\nReferenced examples include:\n\n- `tests/test_repository_context_discovery.py`\n\nThese appear to verify trusted snapshot ordering, frontmatter stripping, ANSI/control-character cleanup, and total-budget enforcement.\n\n### Verification tests\nReferenced examples include:\n\n- `tests/test_verification_runner.py`\n- `tests/test_verification_models.py`\n\nThese validate `VerificationConfig`, `VerificationResult`, output truncation, timeout handling, startup errors, and classification of exit codes.\n\n### Observability and publication tests\nReferenced examples include:\n\n- `tests/test_observability_helpers.py`\n- `tests/test_github_publisher.py`\n\nThese likely verify artifact logging and publication substep logging, including blocked review and comment-upsert failure handling.\n\nEven without the full source here, the call graph shows these tests are part of the same pattern: they specify boundary behavior and integration contracts rather than only internal implementation details.\n\n## Common testing patterns in this module\n\n## 1. Monkeypatching boundary constructors\n\nMany tests patch constructors or factory functions such as:\n\n- `api.agents.factory.create_deep_agent`\n- `api.agents.factory.get_settings`\n- `asyncio.create_subprocess_exec`\n\nThis keeps tests focused on argument construction and boundary behavior.\n\n## 2. Fake agents with `ainvoke`\n\nAgent-facing tests consistently use lightweight doubles implementing:\n\n```python\nasync def ainvoke(input, config=None) -> dict[str, object]:\n    ...\n```\n\nThat mirrors the production contract and makes invocation tests realistic.\n\n## 3. Real Git, fake network\n\nGit tests often use real repositories and subprocess-backed Git behavior, while GitHub tests use `httpx.MockTransport` or fake clients. This is a good tradeoff:\n\n- Git is cheap and deterministic locally\n- network APIs are better isolated behind transport mocks\n\n## 4. Log capture as a first-class assertion surface\n\nSeveral tests attach a custom logging handler and assert exact message content. This means log format and redaction behavior are part of the supported contract.\n\n## 5. Integration tests are explicitly gated\n\nDatabase-backed and external-service tests are either:\n\n- skipped when required env vars are missing, or\n- marked with `@pytest.mark.integration`\n\nThis keeps the default test experience manageable while preserving high-value end-to-end coverage.\n\n## How this module connects to the rest of the codebase\n\nThe `tests/` module touches nearly every major subsystem:\n\n- `lib.config.Settings`\n- `api.app.create_app`\n- `api.agents.*`\n- `api.nodes.*`\n- `api.services.*`\n- `api.repositories.*`\n- `api.tools.*`\n- `lib.git.*`\n- `lib.github.*`\n- `api.verification.*`\n\nIn practice, the tests define the expected seams between these layers:\n\n- settings → startup validation\n- app startup → task recovery and clone cleanup\n- task records → graph input\n- graph nodes → prompt construction and retries\n- Git/GitHub clients → safe transport and logging\n- verification → deterministic evidence and bounded logs\n\n## Contributing guidance\n\nWhen adding or changing behavior in this area, keep these principles in mind.\n\n### Preserve contract-level assertions\nIf a test asserts exact log wording, exact exception type, or exact prompt section presence, that behavior is probably intentional and externally meaningful.\n\n### Prefer focused fakes over broad mocks\nMost tests use small doubles with realistic method signatures. Follow that pattern instead of introducing heavy mocking frameworks or patching deep internals unnecessarily.\n\n### Keep integration fixtures isolated\nIf you add database-backed tests, prefer existing fixtures:\n\n- `migrated_test_database_url`\n- `db_connection`\n\nIf you add Git-backed tests, prefer `seeded_git_repo` unless you specifically need a malformed repo.\n\n### Be careful with environment leakage\nTests that depend on settings should usually use `clean_runtime_env` and explicit `monkeypatch.setenv(...)`.\n\n### Treat logs as part of the API\nBoundary logging tests intentionally prevent leakage of:\n\n- commit bodies\n- subprocess output\n- embedded credentials\n\nAny change to logging should be reviewed for both observability and secrecy.\n\n## Practical map of the suite\n\nIf you need to work on a specific subsystem, start here:\n\n- **Agent creation / prompts / middleware**: `test_agent_factory.py`\n- **Agent registry defaults**: `test_agent_registry.py`\n- **Read-only backend enforcement**: `test_agent_backends.py`\n- **App startup / recovery**: `test_app.py`\n- **Git client behavior**: `test_git_client.py`\n- **Git tool behavior**: `test_git_tool.py`\n- **GitHub transport behavior**: `test_github_client.py`\n- **Verification logging/privacy**: `test_boundary_logging.py`\n- **Coder node behavior**: `test_coder.py`\n- **Code reviewer prompt behavior**: `test_code_reviewer.py`\n- **Settings validation**: `test_config.py`\n- **Error hierarchy**: `test_errors.py`\n- **Execution sandboxing**: `test_execution_scope.py`\n- **GitHub context injection into execution**: `test_execution_github_context.py`\n\n## Summary\n\nThe `tests/` module is a contract suite for DarkFactory’s operational boundaries:\n\n- safe agent capabilities\n- deterministic prompt assembly\n- restart-safe task execution\n- isolated integration infrastructure\n- secure Git/GitHub behavior\n- non-leaky observability\n\nWhen changing production code, these tests tell you not only whether behavior still works, but whether it still works in the way the rest of the system expects.","other-workspace":"# Other — workspace\n\n# workspace/HEARTBEAT.example.md\n\n## Purpose\n\n`workspace/HEARTBEAT.example.md` is a template document for tracking recurring operational work in the DarkFactory workspace.\n\nIt is not executable code and has no runtime behavior. Instead, it serves as a lightweight coordination artifact: a place to list periodic tasks that should happen on a schedule, such as daily reviews, weekly cleanup, or routine maintenance checks.\n\nThis file is useful when the project needs a human-readable checklist of ongoing responsibilities that are easy to forget because they are not tied to a specific feature or bug.\n\n## What the file contains\n\nThe file currently defines:\n\n- A top-level heading: `# HEARTBEAT`\n- A short instruction:\n  - `List recurring or periodic tasks for DarkFactory here.`\n- A few example entries showing the intended format:\n  - `Every weekday morning, review open issues and summarize blockers.`\n  - `Once per day, look for stale PRs that need review.`\n  - `Once per week, suggest cleanup tasks for old runtime artifacts.`\n\nThese examples communicate the expected style:\n\n- plain Markdown\n- simple bullet list entries\n- natural-language schedules\n- task descriptions focused on recurring project maintenance\n\n## Role in the workspace\n\nThis module belongs to the `workspace` area and functions as documentation rather than application logic.\n\nBecause it is a workspace-level artifact:\n\n- it does not expose functions or classes\n- it does not participate in the call graph\n- it does not depend on other modules\n- no other modules invoke it directly\n\nIts connection to the rest of the codebase is organizational rather than technical. It supports project operations around the codebase, not execution within it.\n\n## How it works\n\nThe file works as a manually maintained checklist template.\n\nTypical usage is:\n\n1. Copy or rename `HEARTBEAT.example.md` into an active workspace document if your workflow uses a concrete `HEARTBEAT.md`.\n2. Replace the example bullets with real recurring tasks relevant to the project.\n3. Keep entries concise and schedule-oriented.\n4. Update the list as team responsibilities or maintenance needs change.\n\nThere is no parser, schema, or automation implied by this file in the current module. The content is free-form Markdown intended for humans.\n\n## Suggested content style\n\nThe examples imply a simple pattern for each task entry:\n\n- **cadence** — when the task should happen\n- **action** — what should be done\n- **scope or outcome** — what area to review or what result to produce\n\nExample pattern:\n\n- `Every <time period>, <perform action> for <target/outcome>.`\n\nExamples:\n\n- `Every weekday morning, review open issues and summarize blockers.`\n- `Once per week, suggest cleanup tasks for old runtime artifacts.`\n\nThis style keeps the document readable and easy to scan.\n\n## Key characteristics\n\n### Non-executable\n\nThis file is documentation only. There are no functions, classes, scripts, or configuration structures defined here.\n\n### Template-oriented\n\nThe `.example.md` suffix indicates that this is a sample or starter file, not necessarily the canonical live checklist.\n\n### Workspace-scoped\n\nThe file is intended for project maintenance and team workflow inside the workspace, not for end-user documentation or runtime configuration.\n\n### Low coupling\n\nSince there are no internal or outgoing calls and no execution flows, changes to this file are operationally safe from a runtime perspective. The main impact of edits is on team process and clarity.\n\n## Relationship to the rest of the codebase\n\nAlthough this module has no technical integration points, it can indirectly support several parts of the project:\n\n- **Issue management** by reminding contributors to review open issues\n- **Pull request hygiene** by prompting stale PR checks\n- **Artifact cleanup** by encouraging periodic maintenance of generated files or runtime leftovers\n- **Team coordination** by making recurring responsibilities visible\n\nIn other words, it helps maintain the health of the repository and development workflow even though it does not participate in application execution.\n\n## Maintenance guidance\n\nWhen updating this file:\n\n- prefer actionable tasks over vague reminders\n- include a clear cadence such as “daily”, “weekly”, or “every weekday morning”\n- keep each bullet focused on one recurring responsibility\n- remove outdated tasks when workflows change\n- avoid turning it into a backlog of one-time work items\n\nGood entries are recurring, specific, and easy to verify.\n\n### Good examples\n\n- `Every Monday, review dependency update PRs and flag risky upgrades.`\n- `Once per week, inspect temporary runtime artifacts and remove obsolete files.`\n- `Every weekday afternoon, summarize unresolved blockers from active issues.`\n\n### Less useful examples\n\n- `Fix bugs`\n- `Improve performance someday`\n- `Look at the repo`\n\nThese lack cadence, scope, or a concrete action.\n\n## Contribution notes\n\nIf contributors add or revise tasks in `workspace/HEARTBEAT.example.md`, they should treat it as a shared operational checklist:\n\n- write entries so another developer can understand them immediately\n- keep formatting consistent with the existing bullet-list style\n- prefer stable recurring responsibilities over temporary campaign work\n- align tasks with actual team habits and repository needs\n\n## Summary\n\n`workspace/HEARTBEAT.example.md` is a simple Markdown template for documenting recurring maintenance and coordination tasks in the DarkFactory workspace. It has no runtime behavior, no API surface, and no code-level dependencies. Its value comes from making periodic project responsibilities explicit, visible, and easy to maintain.","other":"# Other\n\n# Other\n\nThe **Other** module groups the repository’s non-feature infrastructure: contributor guidance, build and runtime packaging, database migration scaffolding, architectural planning documents, workspace conventions, and the test suite that validates all of it.\n\nThese pieces do not form a single runtime subsystem. Instead, they define the environment and operating rules around the application:\n\n- how contributors and agents should work in the repo via [AGENTS.md](agents.md) and [CLAUDE.md](claude.md)\n- how the project is packaged and installed via [pyproject.toml](pyproject-toml.md)\n- how the API is containerized and run locally via [Dockerfile](dockerfile.md) and [docker-compose.yml](docker-compose-yml.md)\n- how schema changes are configured and generated via [alembic.ini](alembic-ini.md) and [alembic](alembic.md)\n- how intended system behavior is documented before implementation in [plans](plans.md)\n- how durable workspace context and agent behavior assets are organized in [workspace](workspace.md), [memory](memory.md), [prompts](prompts.md), and [skills](skills.md)\n- how repository-wide behavior is enforced in practice by [tests](tests.md)\n\n## How the sub-modules work together\n\n### Repository operating model\n\n[AGENTS.md](agents.md), [CLAUDE.md](claude.md), and [plans](plans.md) define the project’s human and agent-facing contract.\n\n- **AGENTS.md** provides the architectural map and contribution boundaries.\n- **CLAUDE.md** turns that into a required graph-aware workflow for exploration, refactoring, and safe edits.\n- **plans** captures the intended product and system invariants that implementation code is expected to preserve.\n\nTogether, these documents establish a top-down workflow: understand the architecture, confirm the governing plan, then make changes using the prescribed tooling and safety checks.\n\n### Build, run, and migrate\n\n[pyproject.toml](pyproject-toml.md), [Dockerfile](dockerfile.md), [docker-compose.yml](docker-compose-yml.md), [alembic.ini](alembic-ini.md), and [alembic](alembic.md) define the project’s executable environment.\n\n- **pyproject.toml** is the canonical source for dependencies, packaging, and test configuration.\n- **Dockerfile** builds the application image that runs `python -m api serve`.\n- **docker-compose.yml** combines that application container with PostgreSQL for local development and testing.\n- **alembic.ini** tells Alembic how to locate and run migrations.\n- **alembic/script.py.mako** standardizes the shape of newly generated migration files.\n\nThis creates a consistent lifecycle: install dependencies, run the app in a containerized environment, and evolve the database schema through Alembic-managed revisions.\n\n### Workspace-driven agent behavior\n\n[prompts](prompts.md), [skills](skills.md), [memory](memory.md), and [workspace](workspace.md) provide mutable operational inputs for agent-assisted work.\n\n- **prompts** defines role-specific instruction assets such as `planner`, `reviewer`, `reviser`, `coder`, and `code_reviewer`.\n- **skills** defines the reusable Markdown-based skill format those workflows can draw on.\n- **memory** stores durable context such as operational history and persistent facts.\n- **workspace/HEARTBEAT.example.md** provides a template for recurring operational tasks.\n\nThese files are not code, but they shape how planning, implementation, review, and ongoing maintenance are carried out across sessions.\n\n### Verification and executable contracts\n\n[tests](tests.md) is where the repository’s policies and infrastructure are exercised as executable behavior. The suite spans startup, planning, implementation, publication, verification, observability, and persistence-related flows.\n\nThe visible cross-module test references show that the suite is not isolated to unit helpers; it validates end-to-end workflows using fakes such as `MockStructuredAgent`, `MockTextAgent`, `CoderAction`, `FakeGitHubReviewClient`, `FakeGitHubPublicationClient`, and `GraphRecorder`. It also covers recovery and publication scenarios, including follow-up task creation, blocked review reconciliation, verification reruns, artifact retrieval, and startup recovery.\n\n## Key workflows spanning this module group\n\n### 1. Safe change workflow\n\nA typical repository change starts from the guidance documents:\n\n1. consult [AGENTS.md](agents.md) and [CLAUDE.md](claude.md) for navigation and modification rules\n2. confirm intended behavior in [plans](plans.md)\n3. update code and supporting assets governed by [pyproject.toml](pyproject-toml.md), migration config, or workspace files as needed\n4. validate behavior through [tests](tests.md)\n\nThis is the main bridge between architectural intent, contributor process, and executable verification.\n\n### 2. Local development and runtime setup\n\nFor local execution:\n\n1. dependency and packaging rules come from [pyproject.toml](pyproject-toml.md)\n2. the application image is built by [Dockerfile](dockerfile.md)\n3. the full local stack is orchestrated by [docker-compose.yml](docker-compose-yml.md)\n4. schema migration behavior is controlled by [alembic.ini](alembic-ini.md) and [alembic](alembic.md)\n\nThis workflow ensures the API and PostgreSQL run in a reproducible environment aligned with the project’s packaging and migration setup.\n\n### 3. Agent-assisted planning and implementation\n\nThe repository’s agent workflows are shaped by:\n\n1. architectural constraints in [plans](plans.md)\n2. operating rules in [AGENTS.md](agents.md) and [CLAUDE.md](claude.md)\n3. role instructions in [prompts](prompts.md)\n4. reusable guidance in [skills](skills.md)\n5. durable context in [memory](memory.md)\n6. behavioral enforcement in [tests](tests.md)\n\nThis is how non-code assets influence actual planning, review, coding, and revision behavior across the system.\n\n## Sub-modules\n\n- [AGENTS.md](agents.md)\n- [CLAUDE.md](claude.md)\n- [Dockerfile](dockerfile.md)\n- [alembic.ini](alembic-ini.md)\n- [alembic](alembic.md)\n- [docker-compose.yml](docker-compose-yml.md)\n- [plans](plans.md)\n- [pyproject.toml](pyproject-toml.md)\n- [tests](tests.md)\n- [workspace](workspace.md)\n- [memory](memory.md)\n- [prompts](prompts.md)\n- [skills](skills.md)","overview":"# darkfactory — Wiki\n\n# darkfactory\n\nDarkFactory is a runtime bootstrap for task-driven software work. It exposes a FastAPI service that creates, resumes, executes, verifies, and publishes repository-scoped tasks using agent workflows, Git workspaces, PostgreSQL persistence, and GitHub integration.\n\nIf you are new to the codebase, the shortest useful mental model is:\n\n1. an HTTP request hits the API,\n2. a task service loads or updates task state,\n3. a workflow graph runs task nodes against a Git workspace,\n4. agents use built-in tools and discovered skills to do work,\n5. results are verified, persisted, and optionally published to GitHub.\n\nThis wiki starts from that runtime path and then branches into the subsystem pages linked throughout.\n\n## Big picture\n\nDarkFactory is organized around a small set of boundaries:\n\n- the API process and routes in [Application Bootstrap and API Surface](application-bootstrap-and-api-surface.md)\n- task lifecycle and workflow execution in [Task Orchestration and Workflow Execution](task-orchestration-and-workflow-execution.md)\n- agent construction, tool adaptation, and nested invocation in [Agent Runtime and Tooling Integration](agent-runtime-and-tooling-integration.md)\n- repository-defined extensions in [Skills and Extensible Capabilities](skills-and-extensible-capabilities.md)\n- GitHub publication and comment reconciliation in [GitHub Integration and Publication](github-integration-and-publication.md)\n- repository context discovery and governed memory in [Repository Context and Memory Management](repository-context-and-memory-management.md)\n- PostgreSQL access and migrations in [Persistence and Database Layer](persistence-and-database-layer.md)\n- Git workspace operations in [Git Operations and Workspace Execution](git-operations-and-workspace-execution.md)\n- shared contracts in [Domain Models and Error Contracts](domain-models-and-error-contracts.md)\n- process-wide settings and logging in [Configuration and Logging Infrastructure](configuration-and-logging-infrastructure.md)\n\nAt runtime, most interesting behavior flows through `api/services/tasks.py`, `api/graphs/*`, `api/nodes/*`, `lib/git/*`, and `api/github/*`.\n\n## Architecture in 10 seconds\n\n```mermaid\nflowchart LR\n    A[FastAPI API<br/>api/app.py + api/routes/tasks.py]\n    B[Task Services<br/>api/services/tasks.py]\n    C[Workflow Graphs<br/>api.graphs]\n    D[Workflow Nodes<br/>api.nodes]\n    E[Agent Runtime<br/>api.agents]\n    F[Skills<br/>api.skills]\n    G[Git Workspaces<br/>lib.git]\n    H[GitHub Publication<br/>api.github + lib.github]\n    I[Persistence<br/>api.repositories + migrations]\n    J[Shared Contracts & Infra<br/>api.models/api.errors + lib.config/lib.logger]\n\n    A --> B\n    A --> I\n    B --> C\n    B --> D\n    B --> I\n    B --> H\n    B --> G\n    C --> D\n    D --> E\n    D --> G\n    D --> H\n    E --> F\n    B --> J\n    D --> J\n    I --> J\n```\n\n## What this service actually does\n\nDarkFactory is not just an API wrapper around an LLM. It is a controlled execution environment for repository work.\n\nA task typically has:\n\n- persisted task metadata and status\n- a repository/workspace context\n- an execution plan or branch state\n- one or more workflow steps run through LangGraph-style orchestration\n- artifacts, logs, and publication state\n- optional GitHub side effects such as PR creation or task comments\n\nThe service is designed so that higher-level orchestration code does not directly manipulate SQL, shell out to Git ad hoc, or talk to GitHub without going through the relevant boundary modules. That separation is one of the main things to preserve when changing the system.\n\n## Core runtime path\n\nThe most important end-to-end flow starts in the task routes under `api/routes/tasks.py`.\n\nA request enters through the FastAPI app assembled in `api/app.py`. Route handlers delegate quickly into `api/services/tasks.py`, where task lifecycle logic lives: loading tasks, resolving plans, recovering publication state, coordinating workflow execution, and shaping responses.\n\nFrom there, the service layer fans out into the rest of the system:\n\n- it uses [Persistence and Database Layer](persistence-and-database-layer.md) to read and write task and artifact state\n- it invokes [Task Orchestration and Workflow Execution](task-orchestration-and-workflow-execution.md) graphs and nodes to perform actual work\n- those nodes use [Git Operations and Workspace Execution](git-operations-and-workspace-execution.md) to inspect branches, clones, worktrees, diffs, commits, and pushes\n- nodes also call into [Agent Runtime and Tooling Integration](agent-runtime-and-tooling-integration.md), where role-based agents are built and tools are adapted\n- the agent layer can expose repository-defined capabilities from [Skills and Extensible Capabilities](skills-and-extensible-capabilities.md)\n- publication and reconciliation go through [GitHub Integration and Publication](github-integration-and-publication.md)\n- shared validation and failure semantics come from [Domain Models and Error Contracts](domain-models-and-error-contracts.md)\n\nThat means the service layer is the coordinator, but the real execution work is distributed across specialized modules.\n\n## A concrete request flow\n\nOne representative read path is “get task”.\n\nAt a high level:\n\n- `api/routes/tasks.py` receives the request\n- `api/services/tasks.py` builds the response and resolves the task plan\n- if the plan must be read from Git state, the service calls `lib/git/client.py`\n- the Git client prepares a controlled environment, including HTTPS auth configuration when needed\n- the result is converted back into API/domain models and returned\n\nThere is also an important safety path here: when task state is resolved from a local clone, execution-scope checks in `api/nodes/execution_scope.py` enforce that file access stays within the allowed workspace root. Violations surface as domain-level errors rather than leaking raw filesystem behavior.\n\nThis is a good example of the project’s design style: route → service → boundary module → typed result/error.\n\n## Execution flow for task work\n\nThe write path is more interesting.\n\nWhen a task is executed or resumed, `api/services/tasks.py` coordinates a workflow graph from `api.graphs`. The graph selects and sequences nodes from `api.nodes`, which perform bounded units of work such as reading repository state, invoking agents, updating memory, verifying outputs, or preparing publication.\n\nThose nodes are where most subsystem integration happens:\n\n- Git-backed repository operations come from `lib.git`\n- agent calls and tool wiring come from the agent runtime\n- repository instructions and memory files come from the context/memory module\n- verification logic checks whether the resulting workspace state is acceptable\n- publication nodes can hand off to GitHub logic\n\nIf you want to understand “how work actually gets done,” start with [Task Orchestration and Workflow Execution](task-orchestration-and-workflow-execution.md), then follow links into `api.nodes`, `api.graphs`, and `api.verification`.\n\n## Agents, tools, and skills\n\nThe agent subsystem is intentionally separated from orchestration. [Agent Runtime and Tooling Integration](agent-runtime-and-tooling-integration.md) defines built-in roles, runtime restrictions, backend resolution, and the adapter layer that turns DarkFactory-owned tools into LangChain-compatible tools.\n\nThat separation matters because workflow nodes should ask for an agent capability, not manually assemble model clients or tool lists.\n\nOn top of that, [Skills and Extensible Capabilities](skills-and-extensible-capabilities.md) lets repositories extend the tool surface through `skills/*/SKILL.md`. Skills are discovered from the workspace, validated, checked for runtime availability, and then exposed through the shared tool registry. In practice, this is how repository-local behavior becomes available to the agent layer without hardcoding everything into the service.\n\n## Repository context and memory\n\nDarkFactory distinguishes between two kinds of repository knowledge:\n\n- trusted, bounded repository context discovered from Git snapshots\n- mutable workspace-local memory managed under `memory/`\n\nThe first is handled by [Repository Context and Memory Management](repository-context-and-memory-management.md) through context discovery code and typed repository-context models. The second governs `MEMORY.md` and append-only `HISTORY.md` with explicit write controls and size budgets.\n\nThis module is important because it prevents prompt context from becoming an unbounded scrape of the repository while still allowing durable task-local memory.\n\n## Git and GitHub are separate on purpose\n\nThe codebase draws a clean line between local repository manipulation and remote platform publication.\n\n[Git Operations and Workspace Execution](git-operations-and-workspace-execution.md) is the async interface to the Git CLI. It handles clones, branches, worktrees, diffs, commits, pushes, and repository inspection. It is intentionally thin and operational.\n\n[GitHub Integration and Publication](github-integration-and-publication.md) sits above that. It owns REST API interactions, compact issue/PR context extraction, publication state reconciliation, PR creation, and idempotent task comments. It uses Git where necessary, but it is not the place to add generic repository operations.\n\nIf you are deciding where new code belongs, this distinction is usually the first architectural check to make.\n\n## Persistence, contracts, and infrastructure\n\nThe persistence layer is split between runtime repositories and migration tooling. [Persistence and Database Layer](persistence-and-database-layer.md) provides connection pooling, startup schema checks, and async repository functions such as task/artifact access. Higher layers should not construct SQL directly.\n\nAcross all of these modules, [Domain Models and Error Contracts](domain-models-and-error-contracts.md) provides the typed shapes that move through the system: tasks, artifacts, agents, reviews, repository context, and verification results, along with stable exception types.\n\nFinally, [Configuration and Logging Infrastructure](configuration-and-logging-infrastructure.md) defines how the process starts correctly and how it reports what it is doing. `lib.config` centralizes environment-driven settings; `lib.logger` provides shared logging used heavily by services, nodes, GitHub publication, and Git operations.\n\n## Where to start reading\n\nA good first pass through the codebase is:\n\n1. `api/__main__.py` and `api/app.py` to see how the process starts\n2. `api/routes/tasks.py` to see the public API surface\n3. `api/services/tasks.py` to understand orchestration at the service boundary\n4. `api/graphs/*` and `api/nodes/*` to see execution behavior\n5. `lib/git/client.py` and `api/github/publisher.py` for the two major side-effect layers\n6. the linked wiki pages for the subsystem you plan to change\n\nIf you are debugging a request, follow the route into the service layer first. If you are debugging task execution, start from the service method and then trace into graphs and nodes.\n\n## Local setup\n\nDarkFactory is a Python 3.12+ project built with setuptools.\n\nBasic setup:\n\n```bash\npython3.12 -m venv .venv\nsource .venv/bin/activate\npip install -U pip\npip install -e .\n```\n\nThe project depends on FastAPI, Uvicorn, async PostgreSQL drivers, LangGraph, DeepAgents, and LangChain OpenAI integrations, so a working local environment usually also needs:\n\n- PostgreSQL available for runtime persistence\n- environment variables required by `lib.config`\n- any model/provider credentials needed by the configured agent backend\n- Git available on the host system\n- GitHub credentials if you are testing publication flows\n\nTo run the API locally, start from the entrypoint in `api/__main__.py`. In practice this is typically done either by invoking the package module or by running Uvicorn against the FastAPI app, depending on how your local environment is configured.\n\nBefore going deeper, read the contributor/runtime guidance in the repository’s supporting docs referenced from [Other](other.md), especially `AGENTS.md` if you are working on agent-facing behavior.\n\n## Design principles to keep in mind\n\nWhen modifying DarkFactory, these patterns show up repeatedly:\n\n- keep route handlers thin\n- put lifecycle coordination in services\n- keep workflow behavior in graphs and nodes\n- use typed models and domain errors at boundaries\n- route all database access through repositories\n- route all Git operations through `lib.git`\n- keep GitHub-specific behavior in the GitHub modules\n- treat repository context and memory as governed inputs, not arbitrary file access\n- prefer extending tools/skills over embedding ad hoc agent behavior in orchestration code\n\nFollowing those boundaries will make your changes fit naturally with the rest of the system.\n\n## Next steps\n\nFrom here, most developers should continue with one of these pages:\n\n- [Application Bootstrap and API Surface](application-bootstrap-and-api-surface.md) if you want to understand startup, lifecycle, or HTTP endpoints\n- [Task Orchestration and Workflow Execution](task-orchestration-and-workflow-execution.md) if you want to understand how tasks actually run\n- [Agent Runtime and Tooling Integration](agent-runtime-and-tooling-integration.md) if you are changing agent behavior or tools\n- [GitHub Integration and Publication](github-integration-and-publication.md) if you are working on PRs, comments, or publication state\n- [Persistence and Database Layer](persistence-and-database-layer.md) if you are changing stored task/artifact behavior","persistence-and-database-layer":"# Persistence and Database Layer\n\n# Persistence and Database Layer\n\nThis module owns two related concerns:\n\n- **Schema management** with Alembic\n- **Runtime persistence access** through async repository functions built on `asyncpg`\n\nIt is the boundary between application services/routes and PostgreSQL. Higher-level code does not construct SQL directly; it calls repository functions in `api/repositories/tasks.py` and `api/repositories/artifacts.py`.\n\n## Responsibilities\n\nThe persistence layer provides:\n\n- database connection pooling via `create_pool()` / `close_pool()`\n- startup schema validation via `validate_database_schema()`\n- Alembic configuration and migration execution support\n- CRUD operations for:\n  - `tasks`\n  - `artifacts`\n- a small set of domain-specific lookup queries used by task orchestration and publication recovery flows\n\n## Architecture\n\n```mermaid\nflowchart TD\n    A[Routes / Services] --> B[tasks.py repository]\n    A --> C[artifacts.py repository]\n    B --> D[(PostgreSQL)]\n    C --> D\n    E[database.py] --> D\n    F[Alembic migrations] --> D\n```\n\n## Runtime database access\n\n## `api/repositories/database.py`\n\nThis file contains the shared database utilities used by the application at startup and shutdown.\n\n### `ALEMBIC_INI_PATH`\n\n`ALEMBIC_INI_PATH` resolves the repository-local `alembic.ini` file:\n\n```python\nALEMBIC_INI_PATH = Path(__file__).resolve().parents[2] / \"alembic.ini\"\n```\n\nThis keeps Alembic configuration anchored to the codebase rather than the current working directory.\n\n### `get_alembic_config(database_url: str | None = None) -> Config`\n\nBuilds an Alembic `Config` object for this repository.\n\nBehavior:\n\n- loads `alembic.ini`\n- optionally overrides `sqlalchemy.url` when `database_url` is provided\n\nThis is used internally by `get_expected_schema_revisions()` and can also support tooling that needs a programmatic Alembic config.\n\n### `get_expected_schema_revisions() -> set[str]`\n\nReturns the current Alembic head revision(s) from the migration directory:\n\n```python\nscript_directory = ScriptDirectory.from_config(get_alembic_config())\nreturn set(script_directory.get_heads())\n```\n\nThis is the source of truth for what schema revision the running code expects.\n\n### `create_pool(database_url: str) -> asyncpg.Pool`\n\nCreates the shared `asyncpg` pool used by repository functions.\n\nPool settings are currently fixed:\n\n- `min_size=1`\n- `max_size=10`\n- `command_timeout=60`\n\nThis function does not perform schema validation; that is handled separately by `validate_database_schema()`.\n\n### `close_pool(pool: asyncpg.Pool | None) -> None`\n\nCloses the pool if one was created. Passing `None` is allowed and becomes a no-op, which simplifies shutdown code.\n\n### `validate_database_schema(database_url: str) -> None`\n\nChecks that the connected database is at the exact Alembic revision expected by the codebase.\n\nFlow:\n\n1. compute expected revision heads with `get_expected_schema_revisions()`\n2. connect directly with `asyncpg.connect(database_url)`\n3. query `SELECT version_num FROM alembic_version`\n4. compare the database revisions to the repository heads\n5. raise `ConfigurationError` if:\n   - the `alembic_version` table is missing\n   - the revision set does not match\n\nImportant details:\n\n- missing `alembic_version` is translated from `asyncpg.UndefinedTableError` into a developer-facing `ConfigurationError`\n- the connection is always closed in a `finally` block\n- the comparison is set-based, so it supports multiple heads if the migration graph ever branches\n\nThis function is the guardrail that prevents the app from running against an outdated schema.\n\n## Repository conventions\n\nBoth repository modules follow the same design patterns.\n\n### Executor abstraction\n\nEach repository defines:\n\n```python\nDatabaseExecutor = asyncpg.Connection | asyncpg.Pool\n```\n\nFunctions accept either a single connection or a pool. This makes them usable in:\n\n- normal request handling with a shared pool\n- tests using a dedicated connection\n- transactional flows where a caller passes an acquired connection\n\nThe code relies on the overlapping `asyncpg` API surface: `fetchrow()`, `fetch()`, and `execute()`.\n\n### Record conversion\n\nRows returned by `asyncpg` are converted into model objects through helper functions:\n\n- `_to_task_record()`\n- `_to_artifact_record()`\n\nThese helpers:\n\n1. convert `asyncpg.Record` to a plain `dict`\n2. decode JSON fields if needed\n3. validate the payload with the corresponding Pydantic model:\n   - `TaskRecord.model_validate(...)`\n   - `ArtifactRecord.model_validate(...)`\n\nThis keeps SQL-facing code thin while ensuring repository outputs are typed and validated.\n\n### JSON handling\n\nJSON/JSONB fields are explicitly encoded and decoded:\n\n- `_encode_json(value)` uses `json.dumps(...)`\n- `_decode_json(value)` uses `json.loads(...)` only when the value is a `str`\n\nThis matters because `asyncpg` may return JSONB values in driver-specific forms. The helpers normalize repository behavior before model validation.\n\nCurrent JSON-backed fields:\n\n- `tasks.context`\n- `artifacts.metadata`\n\n### Partial updates\n\n`update_task()` and `update_artifact()` both:\n\n- call `model_dump(exclude_unset=True)` on the update model\n- build SQL assignments dynamically from an allowlist of fields\n- preserve omitted fields\n- return the updated row\n\nThis pattern avoids accidental overwrites and keeps update behavior aligned with PATCH-style semantics.\n\nIf the update model contains no fields, both functions return the current stored record instead of issuing an empty `UPDATE`.\n\n## Task persistence\n\n## `api/repositories/tasks.py`\n\nThis module contains all SQL for the `tasks` table.\n\n### Data shape\n\n`TASK_COLUMNS` defines the canonical projection used by most queries:\n\n- identifiers: `task_id`, `thread_id`\n- task definition: `type`, `description`, `context`\n- execution state: `status`, `result_type`, `result`, `error`\n- branch/planning: `branch_name`, `plan_path`\n- publication state: `publication_status`, `published_at`, `publication_error`\n- GitHub linkage: `github_repo`, `github_issue_number`, `github_pr_number`\n- task lineage: `origin_task_id`, `follow_up_to_task_id`, `follow_up_trigger_key`\n- timestamps: `created_at`, `updated_at`\n\nUsing a shared column list keeps returned shapes consistent across queries.\n\n### `create_task(executor, task: TaskCreate) -> TaskRecord`\n\nInserts a new task row and returns the stored record.\n\nNotable behavior:\n\n- `context` is passed as `$5::jsonb` after `_encode_json(task.context)`\n- all persisted fields are explicitly listed in the `INSERT`\n- the inserted row is returned with `RETURNING {TASK_COLUMNS}`\n\nThis function is used broadly by tests and task creation flows, including follow-up task creation and publication recovery scenarios.\n\n### `get_task(executor, task_id: str) -> TaskRecord`\n\nFetches a task by ID or raises `TaskNotFoundError`.\n\nImplementation pattern:\n\n- delegates to `find_task()`\n- converts absence into a domain-specific exception\n\nUse this when missing data is exceptional.\n\n### `find_task(executor, task_id: str) -> TaskRecord | None`\n\nFetches a task by ID and returns `None` if not found.\n\nUse this when absence is expected or part of control flow.\n\n### `list_tasks(executor, *, limit: int = 100) -> list[TaskRecord]`\n\nReturns tasks ordered by newest first:\n\n```sql\nORDER BY created_at DESC, updated_at DESC, task_id ASC\nLIMIT $1\n```\n\nThis is the general-purpose listing query used in repository tests and service-level task inspection.\n\n### `find_running_task(executor) -> TaskRecord | None`\n\nReturns the most recently created task with `status = 'running'`.\n\nThis is a convenience query for systems that assume at most one active task should be considered “current”.\n\n### `find_latest_task_for_origin(executor, origin_task_id: str) -> TaskRecord | None`\n\nReturns the newest task in an origin-linked chain.\n\nThis query is important for follow-up and publication reconciliation flows. Services use it to determine the latest persisted owner of a GitHub-linked task lineage.\n\n### `list_running_task_ids(executor) -> list[str]`\n\nReturns all currently running task IDs ordered by oldest creation time first.\n\nThis is a lightweight query intended for operational flows that only need identifiers, not full task records.\n\n### `list_latest_publication_recovery_candidates(executor) -> list[TaskRecord]`\n\nReturns the latest succeeded task per `origin_task_id` that has GitHub linkage and a publication state requiring reconciliation.\n\nQuery characteristics:\n\n- uses `SELECT DISTINCT ON (origin_task_id)`\n- filters to:\n  - `status = 'succeeded'`\n  - `github_repo IS NOT NULL`\n  - `publication_status IN ('published', 'pr_open', 'awaiting_human_review', 'blocked', 'failed')`\n- orders by newest task within each origin group\n\nThis is a domain-specific recovery query, not a generic listing API.\n\n### `find_active_follow_up_by_trigger_key(executor, follow_up_trigger_key: str) -> TaskRecord | None`\n\nLooks up an active follow-up task already associated with a trigger key.\n\nIt filters out terminal states:\n\n```sql\nstatus NOT IN ('succeeded', 'failed', 'cancelled')\n```\n\nThis query supports idempotent follow-up creation. The corresponding migration also enforces this at the database level with a partial unique index.\n\n### `list_failed_task_ids_updated_since(executor, updated_since: datetime) -> list[str]`\n\nReturns failed task IDs updated after a given timestamp.\n\nThis is a narrow operational query used for retention/cleanup style workflows where only IDs are needed.\n\n### `update_task(executor, task_id: str, update: TaskUpdate) -> TaskRecord`\n\nApplies a partial update to a task.\n\nKey implementation details:\n\n- only fields in `TASK_UPDATE_FIELDS` are eligible for update\n- `context` is cast with `::jsonb`\n- `updated_at = now()` is always appended when an actual update occurs\n- if no fields are provided, it falls back to `get_task()`\n- if the `UPDATE` affects no rows, it raises `TaskNotFoundError`\n\nThe explicit field allowlist is important: it prevents arbitrary update model contents from becoming SQL assignments.\n\n### `delete_task(executor, task_id: str) -> bool`\n\nDeletes a task row and returns whether anything was removed.\n\nImplementation:\n\n```python\nstatus = await executor.execute(\"DELETE FROM tasks WHERE task_id = $1\", task_id)\nreturn status != \"DELETE 0\"\n```\n\nBecause `artifacts.task_id` has `ON DELETE CASCADE`, deleting a task also deletes its artifacts at the database level.\n\n### `_to_task_record(row: asyncpg.Record) -> TaskRecord`\n\nInternal conversion helper.\n\nBehavior:\n\n- converts row to dict\n- decodes `context`\n- validates with `TaskRecord.model_validate(...)`\n\n### `_encode_json()` / `_decode_json()`\n\nInternal helpers for JSON serialization and deserialization.\n\nThese are intentionally simple and local to the repository module.\n\n## Artifact persistence\n\n## `api/repositories/artifacts.py`\n\nThis module contains all SQL for the `artifacts` table.\n\n### Data shape\n\n`ARTIFACT_COLUMNS` defines the canonical artifact projection:\n\n- `artifact_id`\n- `task_id`\n- `type`\n- `version`\n- `agent`\n- `content`\n- `metadata`\n- `created_at`\n\n### `create_artifact(executor, artifact: ArtifactCreate) -> ArtifactRecord`\n\nInserts an artifact row and returns the stored record.\n\nNotable behavior:\n\n- `metadata` is passed as `$7::jsonb`\n- JSON is encoded with `_encode_json()`\n- the inserted row is returned immediately\n\n### `get_artifact(executor, artifact_id: str) -> ArtifactRecord`\n\nFetches an artifact by ID or raises `ArtifactNotFoundError`.\n\nDelegates to `find_artifact()`.\n\n### `find_artifact(executor, artifact_id: str) -> ArtifactRecord | None`\n\nFetches an artifact by ID if present.\n\n### `list_artifacts_for_task(executor, task_id: str) -> list[ArtifactRecord]`\n\nReturns all artifacts for a task ordered by creation time:\n\n```sql\nORDER BY created_at ASC, artifact_id ASC\n```\n\nThis ordering is important for consumers that reconstruct task history or display artifacts chronologically.\n\n### `find_latest_reviewed_head_commit(executor, task_id: str) -> str | None`\n\nReturns the latest persisted `reviewed_head_commit` value from artifact metadata for a task.\n\nThis is the most specialized query in the module. It searches artifacts where:\n\n```sql\nmetadata ? 'reviewed_head_commit'\n```\n\nThen ranks candidates by:\n\n1. artifact type preference:\n   - `code_review` highest\n   - `review` next\n   - everything else lower\n2. `metadata.review_iteration` descending\n3. `created_at` descending\n4. `artifact_id` descending\n\nThis lets higher-level review logic recover the most relevant reviewed commit provenance without introducing a separate table.\n\n### `update_artifact(executor, artifact_id: str, update: ArtifactUpdate) -> ArtifactRecord`\n\nApplies a partial update to an artifact.\n\nBehavior mirrors `update_task()`:\n\n- only fields in `ARTIFACT_UPDATE_FIELDS` are considered\n- `metadata` is cast to `::jsonb`\n- empty updates return `get_artifact(...)`\n- missing rows raise `ArtifactNotFoundError`\n\nUnlike tasks, artifacts do not maintain an `updated_at` column, so no timestamp mutation is added.\n\n### `delete_artifact(executor, artifact_id: str) -> bool`\n\nDeletes an artifact row and returns whether a row was removed.\n\n### `_to_artifact_record(row: asyncpg.Record) -> ArtifactRecord`\n\nInternal conversion helper.\n\nBehavior:\n\n- converts row to dict\n- decodes `metadata`\n- validates with `ArtifactRecord.model_validate(...)`\n\n### `_encode_json()` / `_decode_json()`\n\nLocal JSON helpers for artifact metadata.\n\n## Schema management with Alembic\n\n## `alembic/env.py`\n\nThis is the Alembic environment script used when running migration commands.\n\n### Configuration behavior\n\nAt import time:\n\n```python\nconfig = context.config\n```\n\nIf `sqlalchemy.url` is not already configured, the script reads `DATABASE_URL` from the environment and injects it:\n\n```python\nif not config.get_main_option(\"sqlalchemy.url\"):\n    database_url = os.getenv(\"DATABASE_URL\")\n    if database_url:\n        config.set_main_option(\"sqlalchemy.url\", database_url)\n```\n\nThis allows migrations to run without hardcoding the database URL in `alembic.ini`.\n\n### `target_metadata = None`\n\nThere is no SQLAlchemy ORM metadata integration here. Migrations are authored manually rather than autogenerated from declarative models.\n\n### `run_migrations_offline()`\n\nConfigures Alembic with a URL only, enabling offline SQL generation.\n\nImportant options:\n\n- `literal_binds=True`\n- `compare_type=True`\n\n### `run_migrations_online()`\n\nCreates an engine from Alembic config and runs migrations against a live connection.\n\nImportant details:\n\n- uses `engine_from_config(...)`\n- uses `pool.NullPool`\n- configures Alembic with `connection=connection`\n- enables `compare_type=True`\n\n### Mode dispatch\n\nAt module end:\n\n```python\nif context.is_offline_mode():\n    run_migrations_offline()\nelse:\n    run_migrations_online()\n```\n\nThis is standard Alembic control flow.\n\n## Migration history\n\n## `20260402_000001_initial_tasks_and_artifacts.py`\n\nCreates the initial `tasks` and `artifacts` schema.\n\n### `tasks` table\n\nColumns:\n\n- `task_id` primary key\n- `thread_id` unique, non-null\n- `type`, `description`, `branch_name` non-null\n- `context` JSONB nullable\n- `status` with default `'running'`\n- `plan_path` nullable\n- `publication_status` with default `'pending'`\n- `published_at`, `publication_error` nullable\n- `result_type`, `result`, `error` nullable\n- `created_at`, `updated_at` default to `now()`\n\nConstraints:\n\n- `ck_tasks_type`: `type IN ('plan', 'implement')`\n- `ck_tasks_status`: `status IN ('running', 'succeeded', 'failed')`\n- `ck_tasks_publication_status`: initially `('pending', 'published', 'failed')`\n\nIndexes:\n\n- `ix_tasks_status_created_at`\n- `ix_tasks_publication_status_created_at`\n- `ix_tasks_branch_name`\n\n### `artifacts` table\n\nColumns:\n\n- `artifact_id` primary key\n- `task_id` non-null foreign key to `tasks.task_id`\n- `type` non-null\n- `version` default `1`\n- `agent`, `content` non-null\n- `metadata` JSONB nullable\n- `created_at` default `now()`\n\nConstraints:\n\n- `ck_artifacts_type`: initially `('review', 'code_review', 'result')`\n- `ck_artifacts_version_positive`: `version >= 1`\n- foreign key `fk_artifacts_task_id_tasks` with `ondelete=\"CASCADE\"`\n\nIndexes:\n\n- `ix_artifacts_task_id_created_at`\n- `ix_artifacts_type_created_at`\n\n## `20260403_000002_task_github_linkage.py`\n\nExtends `tasks` with GitHub linkage and follow-up metadata.\n\nAdded columns:\n\n- `github_repo`\n- `github_issue_number`\n- `github_pr_number`\n- `origin_task_id`\n- `follow_up_to_task_id`\n- `follow_up_trigger_key`\n\nAdded self-referential foreign keys:\n\n- `fk_tasks_origin_task_id_tasks`\n- `fk_tasks_follow_up_to_task_id_tasks`\n\nBoth use `ondelete=\"SET NULL\"`.\n\n### Partial unique index for active follow-ups\n\nThis migration adds:\n\n- `idx_tasks_follow_up_trigger_key_active`\n\nIt is unique only for rows matching:\n\n```sql\nfollow_up_trigger_key IS NOT NULL\nAND status NOT IN ('succeeded', 'failed', 'cancelled')\n```\n\nThis is a key persistence invariant: only one active task may claim a given follow-up trigger key.\n\n### Expanded publication status constraint\n\nThe migration replaces `ck_tasks_publication_status` so `publication_status` can be one of:\n\n- `pending`\n- `published`\n- `pr_open`\n- `awaiting_human_review`\n- `blocked`\n- `merged`\n- `failed`\n\nThis aligns the schema with GitHub publication lifecycle tracking.\n\n### Note on status values\n\nRepository queries such as `find_active_follow_up_by_trigger_key()` and the partial unique index reference `'cancelled'`, but the original `ck_tasks_status` from the initial migration only allows:\n\n- `running`\n- `succeeded`\n- `failed`\n\nThat means the schema and query logic currently assume a broader lifecycle than the check constraint explicitly permits. If task cancellation is intended to be persisted, the task status constraint will need a migration update.\n\n## `20260408_000003_add_verification_artifact_type.py`\n\nExtends `ck_artifacts_type` to allow:\n\n- `verification`\n\nNo new table is introduced; verification remains an artifact subtype stored in `artifacts`.\n\n## How this layer connects to the rest of the codebase\n\nThe repository layer is used directly by services, routes, and tests.\n\n### Task-oriented flows\n\nSeveral service flows depend on task repository lookups beyond basic CRUD:\n\n- publication reconciliation uses `find_latest_task_for_origin()`\n- follow-up creation uses `find_latest_task_for_origin()` and `find_active_follow_up_by_trigger_key()`\n- publication recovery scans use `list_latest_publication_recovery_candidates()`\n- operational cleanup uses `list_failed_task_ids_updated_since()`\n\nA representative flow is:\n\n1. route handler calls a service\n2. service resolves the latest task in a lineage\n3. repository fetches the row\n4. `_to_task_record()` decodes JSON and validates the model\n\n### Artifact-oriented flows\n\nArtifact persistence is used for:\n\n- storing generated outputs\n- storing reviews and code reviews\n- storing verification artifacts\n- reconstructing task history for API responses\n- recovering review provenance via `find_latest_reviewed_head_commit()`\n\n### Startup safety\n\nThe expected startup pattern is:\n\n1. obtain database URL\n2. call `validate_database_schema(database_url)`\n3. create the shared pool with `create_pool(database_url)`\n4. pass the pool into services/routes\n5. close it with `close_pool(pool)` on shutdown\n\nThis ensures the app fails fast if migrations have not been applied.\n\n## SQL and data integrity choices\n\n## Manual SQL over ORM\n\nThis layer uses `asyncpg` directly rather than SQLAlchemy ORM sessions. Benefits of the current approach:\n\n- explicit SQL for every query\n- low abstraction overhead\n- easy control over ordering, filtering, and PostgreSQL-specific JSONB behavior\n- straightforward use of PostgreSQL features like:\n  - `JSONB`\n  - `DISTINCT ON`\n  - partial unique indexes\n  - JSON operators such as `?` and `->>`\n\nThe tradeoff is that schema changes must be reflected manually in:\n\n- migrations\n- `TASK_COLUMNS` / `ARTIFACT_COLUMNS`\n- insert statements\n- update allowlists\n- model conversion expectations\n\n## Database-enforced invariants\n\nImportant invariants are enforced in the schema, not just in Python:\n\n- valid task types via `ck_tasks_type`\n- valid task statuses via `ck_tasks_status`\n- valid publication statuses via `ck_tasks_publication_status`\n- valid artifact types via `ck_artifacts_type`\n- positive artifact versions via `ck_artifacts_version_positive`\n- one task per `thread_id` via unique constraint\n- one active follow-up per trigger key via partial unique index\n- artifact ownership via foreign key to `tasks`\n- cascade deletion of artifacts when a task is deleted\n\nThese constraints are essential because repository functions generally assume the database will reject invalid states.\n\n## Contributing guidelines\n\nWhen changing this module, keep the following in sync.\n\n### If you add a task column\n\nUpdate all of:\n\n- Alembic migration(s)\n- `TASK_COLUMNS`\n- `create_task()`\n- `TASK_UPDATE_FIELDS` if the field should be mutable\n- `_to_task_record()` only if special decoding is needed\n- `TaskCreate`, `TaskUpdate`, and `TaskRecord` models\n\n### If you add an artifact column\n\nUpdate all of:\n\n- Alembic migration(s)\n- `ARTIFACT_COLUMNS`\n- `create_artifact()`\n- `ARTIFACT_UPDATE_FIELDS` if mutable\n- `_to_artifact_record()` if special decoding is needed\n- `ArtifactCreate`, `ArtifactUpdate`, and `ArtifactRecord` models\n\n### If you add a new enum-like status/type value\n\nCheck both:\n\n- database check constraints in migrations\n- repository queries that hardcode lifecycle filters or sort precedence\n\nExamples in this module:\n\n- `ck_tasks_status`\n- `ck_tasks_publication_status`\n- `ck_artifacts_type`\n- `find_active_follow_up_by_trigger_key()`\n- `list_latest_publication_recovery_candidates()`\n- `find_latest_reviewed_head_commit()`\n\n### If you add a new JSON field\n\nFollow the existing pattern:\n\n- cast parameter with `::jsonb` in SQL\n- encode with `_encode_json()`\n- decode in the row conversion helper before model validation\n\n### If you add a new specialized query\n\nPrefer:\n\n- returning validated `TaskRecord` / `ArtifactRecord` objects when full rows are needed\n- returning primitive values only when that materially reduces overhead, as in:\n  - `list_running_task_ids()`\n  - `list_failed_task_ids_updated_since()`\n  - `find_latest_reviewed_head_commit()`\n\n## Known sharp edges\n\n### Task status constraint vs query logic\n\nAs noted above, some logic references `'cancelled'`, but the schema check constraint shown in the migrations does not permit it. This mismatch should be resolved before relying on persisted cancelled tasks.\n\n### `updated_at` maintenance is application-driven\n\n`tasks.updated_at` is updated in `update_task()` by appending `updated_at = now()` to the SQL. There is no trigger in the database enforcing this automatically.\n\nThat means:\n\n- updates performed outside this repository may leave `updated_at` stale\n- any future direct SQL writes should preserve this convention\n\n### No ORM metadata in Alembic\n\nBecause `target_metadata = None`, Alembic autogeneration from SQLAlchemy models is not part of this setup. Schema evolution is migration-first and manual.\n\n## Summary\n\nThe persistence and database layer is intentionally small and explicit:\n\n- Alembic defines and evolves the PostgreSQL schema\n- `database.py` manages pool creation and schema compatibility checks\n- `tasks.py` and `artifacts.py` provide typed, async, SQL-backed repository functions\n- services and routes depend on these repositories for both CRUD and domain-specific lookup behavior\n\nThe module’s main design principle is explicitness: explicit migrations, explicit SQL, explicit JSON handling, and explicit conversion into validated record models.","repository-context-and-memory-management":"# Repository Context and Memory Management\n\n# Repository Context and Memory Management\n\nThis module group does two related jobs:\n\n- **Repository context discovery**: reads a bounded, sanitized set of instruction files from a trusted Git snapshot so those instructions can be injected into prompts.\n- **Governed workspace memory**: manages durable `MEMORY.md` and append-only `HISTORY.md` files under a workspace-local `memory/` directory, with explicit write controls, size budgets, and trusted consolidation.\n\nThe code is split across:\n\n- `api/context/discovery.py`\n- `api/models/repository_context.py`\n- `api/memory/governance.py`\n- `api/memory/consolidation.py`\n\nThe package `__init__.py` files re-export the public entry points.\n\n---\n\n## Design goals\n\n### Repository context\nRepository context discovery is intentionally:\n\n- **deterministic**: files are discovered from a specific Git ref, not the mutable working tree\n- **bounded**: both per-file and total character budgets are enforced\n- **sanitized**: ANSI escapes, control characters, and Markdown frontmatter in `.mdc` files are removed\n- **ordered**: discovery follows a fixed priority list of well-known instruction paths\n\n### Memory management\nWorkspace memory is intentionally:\n\n- **governed**: direct writes to `MEMORY.md` and `HISTORY.md` are blocked unless policy allows them\n- **trusted-flow aware**: internal maintenance paths can bypass normal agent write restrictions\n- **budgeted**: both memory and history files have configurable size limits\n- **append-oriented**: operational history is recorded as bounded single-line entries\n- **consolidated**: durable facts are promoted from history into memory in a controlled pass\n\n---\n\n## Repository context discovery\n\n## Public API\n\nExported from `api/context/__init__.py`:\n\n- `discover_repository_context`\n- `resolve_trusted_snapshot_ref`\n- `strip_frontmatter`\n\n## Data model\n\nDefined in `api/models/repository_context.py`.\n\n### `WELL_KNOWN_REPOSITORY_INSTRUCTION_PATHS`\n\nA fixed tuple of glob patterns used to discover repository instructions:\n\n- `AGENTS.md`\n- `CLAUDE.md`\n- `.cursor/rules/*.mdc`\n- `.cursor/rules/*.md`\n- `.cursorrules`\n- `CONTRIBUTING.md`\n- `.github/CONTRIBUTING.md`\n- `.github/CODEOWNERS`\n- `README.md`\n\nThis list defines both **what** is considered repository guidance and the **priority order** in which files are included.\n\n### `RepoContextConfig`\n\nControls discovery behavior:\n\n- `enabled: bool = True`\n- `per_file_max_chars: int = 8000`\n- `total_max_chars: int = 30000`\n\nIf `enabled` is `False`, discovery returns an empty `RepositoryContext`.\n\n### `InstructionFile`\n\nRepresents one discovered file:\n\n- `relative_path`\n- `content`\n- `char_count`\n\n### `RepositoryContext`\n\nThe final bounded payload:\n\n- `instruction_files: list[InstructionFile]`\n- `total_chars: int`\n- `skipped_files: list[str]`\n\n`skipped_files` records files that matched discovery but were omitted because the total budget would be exceeded.\n\n---\n\n## Discovery flow\n\nThe main entry point is `discover_repository_context(clone_path, snapshot_ref, config=None)`.\n\n### High-level behavior\n\n1. Resolve config, defaulting to `RepoContextConfig()`.\n2. Return an empty `RepositoryContext` immediately if discovery is disabled.\n3. Enumerate candidate paths from the trusted Git snapshot with `_discover_instruction_paths`.\n4. For each discovered path:\n   - read file contents from the snapshot with `_read_file_from_snapshot`\n   - sanitize content with `_sanitize_instruction_content`\n   - skip empty results\n   - truncate to `per_file_max_chars` with `_truncate_content`\n   - stop once adding the file would exceed `total_max_chars`\n5. Return a `RepositoryContext` containing included files, total character count, and any skipped paths.\n\n### Why snapshot-based discovery matters\n\nThe implementation never reads instruction files directly from the filesystem. It uses Git commands against `snapshot_ref`:\n\n- `_list_snapshot_paths()` runs `git ls-tree -r --name-only <snapshot_ref>`\n- `_read_file_from_snapshot()` runs `git show <snapshot_ref>:<relative_path>`\n\nThat makes discovery stable and reviewable against a trusted ref rather than whatever happens to be in the working tree.\n\n### Trusted ref resolution\n\n`resolve_trusted_snapshot_ref(base_branch: str) -> str` currently performs minimal validation:\n\n- trims whitespace\n- rejects blank values with `ValueError`\n- returns the normalized branch name unchanged\n\nThis function is small, but it establishes the contract that repository context discovery should anchor to a **trusted base branch/ref**, not arbitrary mutable state.\n\n---\n\n## Sanitization and truncation\n\n### `_sanitize_instruction_content`\n\nSanitization is applied in this order:\n\n1. normalize line endings:\n   - `\\r\\n` → `\\n`\n   - `\\r` → `\\n`\n2. remove ANSI escape sequences via `_strip_ansi`\n3. remove control characters via `_strip_control_characters`\n   - preserves `\\n` and `\\t`\n   - drops other Unicode control-category characters\n4. if the file ends with `.mdc`, remove leading YAML frontmatter with `strip_frontmatter`\n5. trim surrounding whitespace with `.strip()`\n\nThis is important because repository instruction files may contain terminal formatting, editor metadata, or generated content that should not be injected into prompts.\n\n### `strip_frontmatter`\n\n`strip_frontmatter(document: str) -> str` removes only **leading** YAML frontmatter from Markdown-like content:\n\n- the first line must be exactly `---` after stripping\n- it scans for the next `---`\n- content after that closing delimiter is returned, left-stripped\n- if no closing delimiter is found, the original document is returned unchanged\n\nThis behavior is intentionally conservative.\n\n### `_truncate_content`\n\nIf content exceeds `max_chars`, it is truncated and suffixed with:\n\n```text\n[truncated]\n```\n\nThe actual marker is stored as `_TRUNCATION_MARKER = \"\\n[truncated]\"`.\n\nBehavior details:\n\n- if content already fits, it is returned unchanged\n- if `max_chars` is smaller than the marker length, the marker itself is sliced\n- otherwise the content is cut to leave room for the marker, trailing whitespace is stripped, and the marker is appended\n\nThis preserves a clear signal that the file was clipped.\n\n---\n\n## Path discovery and ordering\n\n### `_discover_instruction_paths`\n\nThis function combines:\n\n- the full path list from `_list_snapshot_paths`\n- the ordered patterns in `WELL_KNOWN_REPOSITORY_INSTRUCTION_PATHS`\n\nFor each pattern, it:\n\n- matches snapshot paths using `fnmatchcase`\n- sorts matches lexically\n- excludes paths already seen\n- appends them to the final list\n\nThat gives deterministic ordering with two layers:\n\n1. pattern priority from `WELL_KNOWN_REPOSITORY_INSTRUCTION_PATHS`\n2. lexical ordering within each pattern\n\nA path matched by an earlier pattern is not duplicated later.\n\n---\n\n## Git execution and error handling\n\n### `_run_git_raw`\n\nAll Git access funnels through `_run_git_raw(clone_path, *args)`:\n\n- launches `git` with `asyncio.create_subprocess_exec`\n- sets `cwd` to `clone_path`\n- captures `stdout` and `stderr`\n- raises `GitOperationError` if the command exits non-zero\n\nThe error includes:\n\n- the full Git command\n- the clone path\n- decoded stderr text, or a fallback message\n\n### `_run_git`\n\n`_run_git` wraps `_run_git_raw` and decodes UTF-8 text with replacement for invalid bytes, then strips surrounding whitespace.\n\n### Encoding behavior\n\n`_read_file_from_snapshot` decodes file bytes with:\n\n```python\noutput.decode(\"utf-8\", errors=\"replace\")\n```\n\nThis avoids hard failures on imperfect repository content while still producing a usable prompt payload.\n\n---\n\n## Repository context execution sketch\n\n```mermaid\nflowchart TD\n    A[discover_repository_context] --> B[_discover_instruction_paths]\n    B --> C[_list_snapshot_paths]\n    C --> D[_run_git]\n    D --> E[_run_git_raw]\n    A --> F[_read_file_from_snapshot]\n    F --> E\n    A --> G[_sanitize_instruction_content]\n    G --> H[strip_frontmatter]\n    A --> I[_truncate_content]\n    A --> J[RepositoryContext]\n```\n\n---\n\n## Governed workspace memory\n\n## Public API\n\nExported from `api/memory/__init__.py`:\n\n- `HISTORY_FILE_NAME`\n- `MEMORY_FILE_NAME`\n- `MEMORY_HEADER`\n- `MemoryConsolidationResult`\n- `MemoryFact`\n- `MemoryWritePolicy`\n- `append_history_entry`\n- `append_task_history_summary`\n- `assert_memory_payload_budget`\n- `assert_memory_size_budget`\n- `assert_memory_write_allowed`\n- `build_task_history_entry`\n- `consolidate_memory_from_history`\n- `get_memory_file_path`\n- `is_governed_memory_path`\n- `rotate_history_if_needed`\n\n---\n\n## Governance model\n\nThe governance layer in `api/memory/governance.py` defines where memory lives, who may write it, and how size limits are enforced.\n\n### Governed files\n\nTwo filenames are treated specially:\n\n- `MEMORY.md`\n- `HISTORY.md`\n\nThey live under:\n\n```text\n<workspace_root>/memory/\n```\n\n### `MemoryWritePolicy`\n\n`MemoryWritePolicy` is the central configuration object:\n\n- `allow_agent_memory_writes: bool = False`\n- `require_approval_for_memory: bool = True`\n- `max_memory_size_kb: int = 8`\n- `max_history_size_kb: int = 500`\n- `max_history_entry_chars: int = 600`\n\nThe defaults are restrictive:\n\n- normal agent flows cannot directly write governed memory\n- approval is required even if agent writes are enabled\n- memory and history are bounded independently\n\n---\n\n## Canonical path handling\n\n### `get_memory_file_path`\n\n`get_memory_file_path(workspace_root, file_name)` returns the canonical path for `MEMORY.md` or `HISTORY.md`.\n\nImplementation details:\n\n- resolves `workspace_root`\n- resolves `<workspace_root>/memory`\n- resolves the final file path\n- verifies the file path is still inside the memory directory using `relative_to`\n\nIf path resolution escapes the governed directory, it raises `MemoryGovernanceError`.\n\nThis is a path-safety guard, not just a convenience helper.\n\n### `is_governed_memory_path`\n\n`is_governed_memory_path(path, workspace_root)` checks whether a resolved path is exactly one of:\n\n- `get_memory_file_path(workspace_root, MEMORY_FILE_NAME)`\n- `get_memory_file_path(workspace_root, HISTORY_FILE_NAME)`\n\nOnly those exact files are treated as governed memory targets.\n\n---\n\n## Write authorization\n\n### `assert_memory_write_allowed`\n\nThis function is the main gatekeeper for writes.\n\nSignature:\n\n```python\nassert_memory_write_allowed(\n    path,\n    *,\n    workspace_root,\n    policy=None,\n    trusted_flow=False,\n    has_approval=False,\n) -> Path\n```\n\nBehavior:\n\n- if `path` is **not** a governed memory path, it returns the resolved path unchanged\n- if `trusted_flow=True`, it allows the write immediately\n- otherwise, governed writes are allowed only when:\n  - `policy.allow_agent_memory_writes` is `True`, and\n  - either `policy.require_approval_for_memory` is `False` or `has_approval=True`\n- when allowing a normal governed write, it also calls `assert_memory_size_budget`\n- otherwise it raises `MemoryGovernanceError`\n\n### Trusted vs normal flows\n\nThis distinction is central to the module:\n\n- **normal task execution** should not mutate durable memory directly\n- **trusted internal maintenance flows** such as history append and consolidation can write governed files by passing `trusted_flow=True`\n\nThat pattern appears throughout the module.\n\n---\n\n## Size and payload budgets\n\n### `assert_memory_size_budget`\n\nChecks whether the current file size plus `additional_bytes` would exceed the configured budget.\n\nUsed for preflight checks on existing files.\n\n### `assert_memory_payload_budget`\n\nChecks whether a complete payload string would exceed the configured budget after UTF-8 encoding.\n\nUsed before writing full file contents.\n\n### `_get_memory_budget_bytes`\n\nMaps file names to policy budgets:\n\n- `MEMORY.md` → `max_memory_size_kb * 1024`\n- `HISTORY.md` → `max_history_size_kb * 1024`\n\nUnknown file names return `None`, meaning no governed budget applies.\n\n---\n\n## History writing\n\n`HISTORY.md` is the operational log. The governance layer supports both structured task summaries and raw trusted entries.\n\n### `build_task_history_entry`\n\nBuilds one grep-friendly single-line entry from a `TaskRecord`.\n\nIncluded fields:\n\n- timestamp from `written_at` or `task.updated_at`\n- `task=<task_id>`\n- `type=<task.type>`\n- `status=<task.status>`\n- optional `result=<task.result_type>`\n- `publication=<task.publication_status>`\n- detail text from:\n  - `task.error`, else\n  - `task.result`, else\n  - `task.description`\n\nNormalization rules:\n\n- control characters are collapsed to spaces by `_normalize_history_text`\n- token fields are normalized by `_normalize_history_token`\n- the final line is bounded by `_truncate_history_entry(policy.max_history_entry_chars)`\n\nThe output format is:\n\n```text\n- 2024-01-01T00:00:00Z task=... type=... status=... publication=...: detail\n```\n\n### `append_task_history_summary`\n\nConvenience wrapper that:\n\n1. calls `build_task_history_entry`\n2. passes the result to `append_history_entry`\n\n### `append_history_entry`\n\nAppends one already-formatted trusted entry to `HISTORY.md`.\n\nImportant details:\n\n- it resolves the canonical history path with `get_memory_file_path`\n- it authorizes the write with `assert_memory_write_allowed(..., trusted_flow=True)`\n- it performs the actual file mutation in a worker thread via `asyncio.to_thread(_append_history_entry, ...)`\n\n### `_append_history_entry`\n\nThis is where append semantics and rotation are enforced:\n\n1. read existing history text if present\n2. build the append payload with `_build_history_append_payload`\n3. compute the final text\n4. if the final text would exceed the history budget:\n   - archive the current file with `_rotate_history_file`\n   - rebuild the payload against an empty active history\n5. validate the final payload with `assert_memory_payload_budget`\n6. ensure parent directories exist\n7. write the final text\n\n### `_build_history_append_payload`\n\nEnsures the file is always well-formed:\n\n- if the file is empty, prepend `HISTORY_HEADER`\n- otherwise append the entry with correct newline handling\n\n### Rotation behavior\n\n`rotate_history_if_needed` and `_append_history_entry` both rely on the same rotation primitives.\n\n- `rotate_history_if_needed(...)` proactively rotates an oversized active history file\n- `_append_history_entry(...)` rotates reactively if appending a new entry would overflow the budget\n\nArchived files are named by `_build_history_archive_path`:\n\n```text\nHISTORY.YYYYMMDDTHHMMSSZ.md\nHISTORY.YYYYMMDDTHHMMSSZ.1.md\nHISTORY.YYYYMMDDTHHMMSSZ.2.md\n...\n```\n\nAfter rotation, the active file is reset to `HISTORY_HEADER`.\n\n---\n\n## Memory consolidation\n\n`api/memory/consolidation.py` promotes durable facts from `HISTORY.md` into `MEMORY.md`.\n\nThis is a **trusted maintenance flow**, not a general-purpose write path.\n\n## Data types\n\n### `MemoryFact`\n\nRepresents one durable fact:\n\n- `category`\n- `statement`\n\nUseful computed properties:\n\n- `line` → renders as `- {category}: {statement}`\n- `normalized_category` → normalized via `_normalize_memory_category`\n\nThe model is frozen (`ConfigDict(frozen=True)`), so facts are immutable once created.\n\n### `MemoryConsolidationResult`\n\nSummarizes one consolidation pass:\n\n- `promoted: list[str]`\n- `replaced: list[str]`\n- `unchanged: list[str]`\n- `archive_path: str | None`\n\n`archive_path` is populated if history rotation occurred during consolidation.\n\n---\n\n## Consolidation flow\n\n### `consolidate_memory_from_history`\n\nThis is the main entry point.\n\nBehavior:\n\n1. resolve `MemoryWritePolicy`\n2. resolve and authorize canonical paths for:\n   - `MEMORY.md`\n   - `HISTORY.md`\n   using `assert_memory_write_allowed(..., trusted_flow=True)`\n3. read both files concurrently with `asyncio.gather(... asyncio.to_thread(...))`\n4. extract candidate durable facts from history with `_collect_candidate_facts`\n5. rotate oversized history with `rotate_history_if_needed`\n6. if no candidate facts were found:\n   - return `MemoryConsolidationResult`, possibly including `archive_path`\n7. merge candidate facts into existing memory with `_merge_memory_facts`\n8. if memory changed:\n   - validate the full payload with `assert_memory_payload_budget`\n   - write updated memory\n   - for each replaced fact, append a trusted supersession entry to history using `append_history_entry`\n9. return a `MemoryConsolidationResult`\n\nA subtle but important detail: history rotation happens even if no facts are promoted, so consolidation also acts as a maintenance pass for oversized history.\n\n---\n\n## Fact extraction rules\n\n### `_collect_candidate_facts`\n\nScans history entries and keeps only the **latest fact per normalized category**.\n\nImplementation detail:\n\n- it iterates over `_iter_history_entries(history_text)`\n- extracts facts with `_extract_fact_from_history_entry`\n- if a category is seen again, the older fact is removed and the newer one wins\n- insertion order is preserved so the final list reflects latest surviving facts\n\n### `_iter_history_entries`\n\nOnly lines starting with `\"- \"` are considered history entries.\n\n### `_extract_fact_from_history_entry`\n\nParses the detail portion after the first `\": \"` in a history line.\n\nIt rejects entries when:\n\n- there is no `\": \"` separator\n- the normalized detail is empty\n- the detail ends with `...` (treated as truncated / unreliable)\n\nIt then tries two prefix sets.\n\n#### Explicit prefixes\n\n`EXPLICIT_FACT_PREFIXES`:\n\n- `\"repo convention:\"` → `\"Repo convention\"`\n- `\"repository convention:\"` → `\"Repo convention\"`\n- `\"operator preference:\"` → `\"Operator preference\"`\n- `\"workflow default:\"` → `\"Workflow default\"`\n\nFor these, the prefix is stripped and the remainder becomes the fact statement.\n\n#### Heuristic prefixes\n\n`HEURISTIC_FACT_PREFIXES`:\n\n- `\"this repo uses \"` → `\"Repo convention\"`\n- `\"the team prefers \"` → `\"Operator preference\"`\n\nFor these, the full normalized detail becomes the statement.\n\n### `_clean_fact_statement`\n\nNormalizes candidate statements by:\n\n- collapsing whitespace\n- trimming surrounding spaces and hyphens\n- rejecting empty values\n- rejecting values ending in `...`\n- rejecting values longer than `MAX_DURABLE_FACT_CHARS` (`240`)\n- appending a trailing period if the statement lacks terminal punctuation\n\nThis keeps durable memory concise and complete.\n\n---\n\n## Merging facts into `MEMORY.md`\n\n### `_merge_memory_facts`\n\nThis function merges extracted facts into the existing memory document and returns:\n\n```python\n(updated_memory, promoted, replaced, unchanged)\n```\n\nWhere:\n\n- `updated_memory: str` is the full rendered document\n- `promoted: list[str]` are newly added facts\n- `replaced: list[tuple[str, str]]` are `(old_fact, new_fact)` pairs\n- `unchanged: list[str]` are facts already present exactly\n\n### Merge strategy\n\n1. Initialize document lines with `_ensure_memory_document_lines`\n   - empty memory becomes `MEMORY_HEADER`\n2. Build a set of normalized exact fact lines\n3. Build a category-to-line index with `_index_memory_categories`\n4. For each candidate fact:\n   - if an exact normalized line already exists, mark it `unchanged`\n   - else if no fact exists for that category, append it and mark `promoted`\n   - else replace the existing category line and mark `replaced`\n5. Render the final document with `_render_memory_document`\n\n### Category matching\n\nCategory replacement is based on `fact.normalized_category`, which uses `_normalize_memory_category`:\n\n- trim\n- collapse internal whitespace\n- lowercase\n\nThis means category spelling differences like extra spaces or case changes do not create duplicate categories.\n\n### Existing memory parsing\n\n`_index_memory_categories` uses `_parse_memory_line` to recognize existing fact lines.\n\nA valid memory fact line must:\n\n- start with `- `\n- contain a `:`\n- have a non-empty category\n- have a valid cleaned statement\n\nNon-fact lines are ignored, so the document can contain headers or other text without breaking consolidation.\n\n### Replacement audit trail\n\nWhen a fact is replaced, `consolidate_memory_from_history` appends a trusted history entry built by `_build_replacement_history_entry`.\n\nThat entry records:\n\n- the consolidation timestamp\n- `source=memory_consolidation`\n- `event=superseded_fact`\n- the old and new fact text\n\nThis creates an audit trail for durable memory changes.\n\n---\n\n## How the pieces connect\n\nThe repository context and memory systems are separate, but they serve a common purpose: supplying bounded, trustworthy context to the agent.\n\n- **Repository context** captures instructions from the target repository itself.\n- **Workspace memory** captures durable operational knowledge accumulated over time.\n\nRepository context is read from a trusted Git snapshot and is effectively external input.\nWorkspace memory is internal state, protected by governance rules and updated only through trusted flows.\n\n### Typical repository-context path\n\n- caller resolves a trusted base ref with `resolve_trusted_snapshot_ref`\n- caller invokes `discover_repository_context`\n- returned `RepositoryContext` is later formatted into prompts elsewhere in the codebase\n\n### Typical memory-history path\n\n- task finalization calls `append_task_history_summary`\n- a bounded line is appended to `memory/HISTORY.md`\n- later, a trusted maintenance step calls `consolidate_memory_from_history`\n- durable facts are promoted into `memory/MEMORY.md`\n- oversized history may be archived during append or consolidation\n\n---\n\n## Error handling\n\n### `GitOperationError`\n\nRaised by `_run_git_raw` when Git commands fail during repository context discovery.\n\nTypical causes:\n\n- invalid `snapshot_ref`\n- missing repository\n- missing object/path in the snapshot\n\n### `MemoryGovernanceError`\n\nRaised by governance checks when:\n\n- a governed path escapes the workspace memory directory\n- direct writes to governed memory are attempted without permission\n- a payload exceeds configured size budgets\n\nThis is the main enforcement mechanism for memory safety.\n\n---\n\n## Contribution notes\n\n## When changing repository context discovery\n\nBe careful to preserve:\n\n- deterministic ordering from `WELL_KNOWN_REPOSITORY_INSTRUCTION_PATHS`\n- snapshot-based reads rather than working-tree reads\n- sanitization before truncation\n- total-budget enforcement with accurate `skipped_files`\n\nIf you add new instruction file patterns, remember that their position in `WELL_KNOWN_REPOSITORY_INSTRUCTION_PATHS` changes prompt priority.\n\n## When changing memory governance\n\nBe careful to preserve the distinction between:\n\n- `trusted_flow=True` internal maintenance writes\n- normal agent writes governed by `MemoryWritePolicy`\n\nMost bugs in this area would either:\n- accidentally block trusted maintenance, or\n- accidentally allow unrestricted writes to `MEMORY.md` / `HISTORY.md`\n\n## When changing consolidation\n\nPay attention to the extraction heuristics:\n\n- explicit prefixes are intentionally narrow\n- heuristic prefixes are intentionally conservative\n- truncated history details (`...`) are ignored to avoid promoting incomplete facts\n\nAlso preserve the audit behavior for replacements. The history entry written by `_build_replacement_history_entry` is part of the traceability story for durable memory.\n\n---\n\n## Quick reference\n\n### Repository context\n- `discover_repository_context(...)`: discover and sanitize instruction files from a trusted Git snapshot\n- `resolve_trusted_snapshot_ref(base_branch)`: validate the trusted anchor ref\n- `strip_frontmatter(document)`: remove leading YAML frontmatter\n\n### Memory governance\n- `MemoryWritePolicy`: configure write permissions and budgets\n- `get_memory_file_path(...)`: canonical governed file path\n- `assert_memory_write_allowed(...)`: authorize writes\n- `append_task_history_summary(...)`: append one task-derived history line\n- `append_history_entry(...)`: append one trusted history line\n- `rotate_history_if_needed(...)`: archive oversized history\n\n### Memory consolidation\n- `consolidate_memory_from_history(...)`: promote durable facts from history into memory\n- `MemoryFact`: normalized durable fact\n- `MemoryConsolidationResult`: summary of one consolidation pass","skills-and-extensible-capabilities":"# Skills and Extensible Capabilities\n\n# Skills and Extensible Capabilities\n\nThe `api.skills` package provides workspace-driven extensibility. It discovers skill definitions from the repository, validates their metadata, checks whether their runtime requirements are available, and exposes usable skills as regular tools in the shared tool registry.\n\nA skill is defined by a `SKILL.md` file under `skills/*/SKILL.md`. The file contains YAML-like frontmatter describing the skill, followed by markdown body content that the agent layer can load and use.\n\nThis module is the boundary between:\n\n- the filesystem (`skills/*/SKILL.md`)\n- structured metadata (`SkillMeta`, `SkillRecord`)\n- runtime availability checks (`bins`, `env`)\n- the tool registry (`ToolRegistration`, `ToolAdapter`)\n- nested agent execution (`create_skill_agent`, `invoke_agent`)\n\n## Public API\n\n`api/skills/__init__.py` re-exports the main entry points:\n\n- `SkillCatalog`\n- `SkillMeta`\n- `SkillRecord`\n- `SkillRefreshResult`\n- `SkillTool`\n- `build_default_skill_catalog()`\n- `build_skill_tool()`\n- `discover_skill()`\n- `load_skill_content()`\n- `missing_requirements()`\n- `requirements_met()`\n- `skill_tool_name()`\n- `sync_skill_tool_registrations()`\n\n## Skill file format\n\nSkills are discovered from files matching:\n\n- `skills/*/SKILL.md`\n\nA typical file looks like:\n\n```md\n---\nname: my-skill\ndescription: Helps with a specific workflow\nbins: [git, jq]\nenv: [GITHUB_TOKEN]\nalways_load: false\n---\n\n# Instructions\n\nDetailed skill content goes here.\n```\n\n### Supported frontmatter fields\n\nParsed into `SkillMeta`:\n\n- `name: str`\n- `description: str`\n- `bins: list[str] = []`\n- `env: list[str] = []`\n- `always_load: bool = False`\n\n### Frontmatter parser constraints\n\nThe parser in `loader.py` intentionally supports a small subset of YAML rather than a full YAML implementation.\n\nSupported patterns:\n\n- `key: value`\n- quoted or unquoted scalar strings\n- booleans: `true`, `false`\n- inline lists: `[a, b, \"c\"]`\n- comments and blank lines\n\nNot supported:\n\n- nested objects\n- multiline YAML values\n- block lists using `- item`\n- advanced YAML syntax\n\nInvalid frontmatter raises `SkillParseError`.\n\n## Architecture\n\n```mermaid\nflowchart TD\n    A[skills/*/SKILL.md] --> B[loader.py]\n    B --> C[discover_skill]\n    C --> D[SkillRecord]\n    D --> E[SkillCatalog]\n    E --> F[sync_skill_tool_registrations]\n    F --> G[SkillTool]\n    G --> H[ToolRegistry]\n    G --> I[create_skill_agent / invoke_agent]\n```\n\n## Core data models\n\n## `SkillMeta`\n\nDefined in `api/skills/models.py`.\n\n`SkillMeta` is the validated representation of skill frontmatter.\n\nFields:\n\n- `name`\n- `description`\n- `bins`\n- `env`\n- `always_load`\n\nValidation behavior:\n\n- `name` and `description` are stripped and must not be empty\n- `bins` and `env` entries are stripped\n- `bins` and `env` must not contain empty strings\n- `bins` and `env` must be unique\n\nThe model is frozen via `ConfigDict(frozen=True)`, so discovered metadata is immutable after validation.\n\n## `SkillRecord`\n\nAlso defined in `models.py`.\n\n`SkillRecord` combines metadata with discovery-time state:\n\n- `meta: SkillMeta`\n- `path: Path`\n- `relative_path: str`\n- `summary: str`\n- `availability: Literal[\"available\", \"unavailable\"]`\n- `missing_requirements: list[str]`\n\nConvenience property:\n\n- `name` → `self.meta.name`\n\nImportant distinction:\n\n- `SkillMeta` describes what the skill declares\n- `SkillRecord` describes what was discovered in the current workspace and environment\n\n`summary` is currently set from `metadata.description`.\n\n## `SkillRefreshResult`\n\nReturned by `SkillCatalog.refresh()`.\n\nFields:\n\n- `available: list[str]`\n- `unavailable: list[str]`\n- `retained: list[str]`\n- `invalid: dict[str, str]`\n\nThis gives callers a compact summary of what changed during a refresh cycle.\n\n## Loading and parsing skill files\n\n`api/skills/loader.py` contains the low-level file and frontmatter handling.\n\n## `read_skill_file(skill_path)`\n\nReads the file as UTF-8 text.\n\n- wraps `OSError`\n- raises `SkillParseError` with path context on failure\n\n## `split_frontmatter(document)`\n\nSplits a markdown document into:\n\n- frontmatter text\n- body text\n\nBehavior:\n\n- normalizes line endings\n- requires the first non-empty line to be `---` to treat the file as frontmatter-bearing\n- returns `(\"\", body)` if no opening delimiter is present\n- raises `SkillParseError` if the opening delimiter exists but no closing delimiter is found\n\n## `parse_skill_frontmatter(frontmatter)`\n\nParses the supported frontmatter subset into `dict[str, object]`.\n\nValidation includes:\n\n- every non-comment, non-empty line must contain `:`\n- keys must be non-empty\n- duplicate keys are rejected\n- malformed list syntax is rejected\n- empty list items are rejected\n\n## `load_skill_document(skill_path)`\n\nPrimary loader used by discovery.\n\nReturns:\n\n```python\ntuple[dict[str, object], str]\n```\n\nWhere:\n\n- first item is parsed frontmatter\n- second item is stripped markdown body\n\nIt also enforces that frontmatter must exist. If `split_frontmatter()` returns an empty frontmatter block, `load_skill_document()` raises:\n\n- `SkillParseError(\"Skill file is missing YAML frontmatter.\")`\n\n## `load_skill_content(skill_path)`\n\nConvenience helper that returns only the markdown body, without frontmatter.\n\nThis is useful when another layer needs the skill instructions but not the metadata.\n\n## Discovery and catalog management\n\n`api/skills/discovery.py` is responsible for turning files on disk into validated `SkillRecord` instances and maintaining a reload-safe catalog.\n\n## `discover_skill(workspace_root, skill_path, *, environment=None, which_resolver=None)`\n\nDiscovers a single skill file.\n\nExecution flow:\n\n1. resolve `workspace_root` and `skill_path`\n2. call `load_skill_document()`\n3. validate frontmatter with `SkillMeta.model_validate(...)`\n4. compute missing requirements with `missing_requirements(...)`\n5. build and return a `SkillRecord`\n\nAvailability is derived directly from requirement resolution:\n\n- no missing requirements → `\"available\"`\n- any missing requirements → `\"unavailable\"`\n\n`relative_path` is computed relative to `workspace_root` when possible.\n\n### Error handling\n\nIf frontmatter cannot be parsed or validated, `discover_skill()` raises `SkillParseError`.\n\nNotably, Pydantic `ValidationError` from `SkillMeta.model_validate()` is converted into `SkillParseError`, so callers only need to handle the skill-specific parse exception.\n\n## `SkillCatalog`\n\n`SkillCatalog` stores the current discovered skill set in memory.\n\nInternal storage:\n\n```python\nself._skills: dict[str, SkillRecord]\n```\n\nThe key is the skill name.\n\n### `get(name)`\n\nReturns a `SkillRecord` by skill name.\n\nRaises:\n\n- `SkillNotFoundError` if the name is not registered\n\n### `list_skills()`\n\nReturns the list of skill names.\n\n### `list_entries()`\n\nReturns the list of `SkillRecord` objects.\n\n### `refresh(workspace_root, *, environment=None, which_resolver=None)`\n\nRefreshes the catalog from the workspace.\n\nThis is the most important method in the package. It is designed to be resilient during live edits of skill files.\n\nBehavior:\n\n1. resolve `workspace_root`\n2. scan `workspace_root / \"skills\"` for `*/SKILL.md`\n3. remember previous entries by resolved path\n4. rediscover each skill with `discover_skill()`\n5. preserve valid skills in `next_skills`\n6. if a file becomes malformed, retain the last known-good entry for that same path when possible\n7. if duplicate skill names are found, mark the later one invalid and optionally retain the previous known-good entry for that path\n8. replace the catalog with the refreshed set\n9. return `SkillRefreshResult`\n\n### Last known-good retention\n\nA key design choice is that refresh is not all-or-nothing.\n\nIf a previously valid skill file becomes malformed, `refresh()` does not necessarily drop it immediately. Instead, if there is a prior valid `SkillRecord` for the same file path, that record is retained and reported in `SkillRefreshResult.retained`.\n\nThis makes the system tolerant of transient invalid edits while a developer is modifying `SKILL.md`.\n\n### Duplicate name handling\n\nSkill names must be unique across the workspace.\n\nIf two different files resolve to the same `skill.name`:\n\n- the later conflicting file is marked invalid\n- a warning is logged\n- the previous valid entry for that path may be retained if available\n\n### Logging\n\nInvalid or conflicting skills are skipped with `logger.warning(...)`.\n\n### Availability vs registration\n\n`refresh()` keeps both available and unavailable skills in the catalog. Availability only affects whether a skill is bridged into the tool registry later.\n\n## `build_default_skill_catalog(workspace_root)`\n\nConvenience constructor:\n\n1. creates a new `SkillCatalog`\n2. immediately calls `refresh(workspace_root)`\n3. returns the populated catalog\n\nThis is used during app and task-service initialization.\n\n## Requirement resolution\n\n`api/skills/requirements.py` evaluates whether a discovered skill can actually run in the current environment.\n\n## `missing_requirements(skill_meta, *, environment=None, which_resolver=None)`\n\nReturns a list of missing requirements.\n\nChecks:\n\n- each binary in `skill_meta.bins` using `shutil.which` by default\n- each environment variable in `skill_meta.env` using `os.environ` by default\n\nReturned entries are normalized strings:\n\n- missing binary → `bin:<name>`\n- missing environment variable → `env:<name>`\n\nExamples:\n\n```python\n[\"bin:jq\", \"env:GITHUB_TOKEN\"]\n```\n\nDependency injection points:\n\n- `environment`\n- `which_resolver`\n\nThese make discovery deterministic in tests and allow callers to evaluate availability against a custom environment snapshot.\n\n## `requirements_met(skill_meta, *, environment=None, which_resolver=None)`\n\nThin convenience wrapper around `missing_requirements(...)`.\n\nEquivalent to:\n\n```python\nnot missing_requirements(...)\n```\n\n## Bridging skills into tools\n\n`api/skills/bridge.py` turns discovered skills into entries in the shared tool registry.\n\nThis is what makes workspace skills callable through the same tool infrastructure as built-in tools.\n\n## Tool naming\n\n### `SKILL_TOOL_PREFIX`\n\nConstant:\n\n```python\nSKILL_TOOL_PREFIX = \"skill:\"\n```\n\n### `skill_tool_name(skill_name)`\n\nBuilds the stable registry name for a skill:\n\n```python\nf\"skill:{skill_name}\"\n```\n\nThis naming convention is used consistently for registration, permissions, and stale-tool cleanup.\n\n## `SkillTool`\n\n`SkillTool` is the bridge object for one discovered skill.\n\nConstructor:\n\n```python\nSkillTool(skill_path: str | Path, skill_meta: SkillMeta)\n```\n\nOn initialization it:\n\n- resolves `skill_path`\n- stores `skill_meta`\n- builds a `ToolConfig` with:\n  - `name=skill_tool_name(skill_meta.name)`\n  - `description=skill_meta.description`\n  - `permissions=[ToolPermission(name=skill_tool_name(skill_meta.name), level=\"read\")]`\n\n### Permission model\n\nThe bridge intentionally grants only a `read` permission level for the skill tool itself.\n\nThis matters because `execute()` inherits the parent agent’s tool environment rather than creating an unrestricted nested agent. The call graph shows that execution flows through `build_tools(...)` and the normal permission filtering path, so skill-backed tools do not bypass the existing permission model.\n\n## `SkillTool.execute(...)`\n\n```python\nasync def execute(\n    self,\n    task: str,\n    context: str = \"\",\n    *,\n    parent_config: AgentConfig,\n    clone_path: str | Path,\n    tool_registry: ToolRegistry,\n) -> str\n```\n\nThis method invokes the skill through the shared agent boundary.\n\nExecution steps:\n\n1. resolve `clone_path`\n2. build inherited tool adapters with `build_tools(parent_config, tool_registry, resolved_clone_path)`\n3. resolve the backend with `resolve_backend(parent_config.backend, resolved_clone_path)`\n4. create a nested skill agent with `create_skill_agent(...)`\n   - passes the parent config\n   - passes the current clone path\n   - passes the shared tool registry\n   - restricts loaded skills to `[str(self.skill_path)]`\n   - overrides backend and tool adapters with inherited values\n5. render the prompt with `render_skill_tool_prompt(task, context)`\n6. invoke the nested agent with `invoke_agent(...)`\n7. extract plain text with `extract_text(...)`\n8. return the extracted text\n\n### Why this design matters\n\nSkills are not executed as arbitrary scripts. They are executed as nested agent invocations using the same backend and tool ecosystem as the parent agent.\n\nThat gives the system:\n\n- consistent backend behavior\n- inherited tool access\n- permission enforcement through normal tool-building paths\n- isolation via a dedicated thread id\n\n## `render_skill_tool_prompt(task, context=\"\")`\n\nBuilds the prompt sent to the nested skill agent.\n\nPrompt structure:\n\n- instruction to use the loaded skill\n- `Task:` section\n- optional `Additional context:` section\n- final instruction: `Return only the final result for the caller.`\n\nEmpty task values are normalized to:\n\n- `(no task provided)`\n\n## `build_skill_thread_id(skill_meta)`\n\nCreates a unique nested thread id:\n\n```python\nf\"skill-{skill_meta.name}-{uuid4().hex}\"\n```\n\nThis isolates each skill invocation from other agent threads.\n\n## `to_adapter(build_context)`\n\nConverts a `SkillTool` into a `ToolAdapter`.\n\nIt closes over the `ToolBuildContext` and returns an adapter whose executor signature is:\n\n```python\nasync def _execute(task: str, context: str = \"\") -> str\n```\n\nThat executor forwards to `SkillTool.execute(...)` using:\n\n- `build_context.agent_config`\n- `build_context.clone_path`\n- `build_context.registry`\n\nThis is the point where a discovered skill becomes a concrete, agent-scoped tool implementation.\n\n## `to_registration()`\n\nConverts the bridge into a `ToolRegistration`:\n\n- `config=self.config`\n- `materialize=self.to_adapter`\n\nThis allows the registry to lazily materialize adapters per build context.\n\n## `build_skill_tool(skill)`\n\nConvenience helper that wraps a `SkillRecord` in `SkillTool`.\n\nIt expects a discovered skill record and uses:\n\n- `skill.path`\n- `skill.meta`\n\n## Synchronizing the tool registry\n\n## `sync_skill_tool_registrations(registry, skill_catalog)`\n\nRegisters currently available skills and removes stale skill tools.\n\nBehavior:\n\n1. iterate over `skill_catalog.list_entries()`\n2. skip any skill whose `availability != \"available\"`\n3. create a `SkillTool`\n4. register its `ToolRegistration` in the `ToolRegistry`\n5. track active skill tool names\n6. iterate over existing registry tool names\n7. unregister any tool whose name starts with `skill:` but is no longer active\n8. return the list of active skill tool names\n\n### Important implications\n\n- unavailable skills remain in the catalog but are not exposed as tools\n- stale skill tools are actively removed from the registry\n- the registry is treated as a projection of the current available-skill set\n\nThis function is called from application bootstrap and task execution paths, so the tool registry stays aligned with the latest workspace state.\n\n## End-to-end flow\n\nA typical lifecycle looks like this:\n\n1. application or task service builds a catalog with `build_default_skill_catalog()`\n2. `SkillCatalog.refresh()` scans `skills/*/SKILL.md`\n3. each file is parsed by `load_skill_document()`\n4. metadata is validated into `SkillMeta`\n5. requirements are checked with `missing_requirements()`\n6. discovered entries are stored as `SkillRecord`\n7. `sync_skill_tool_registrations()` registers available skills as `skill:<name>` tools\n8. when a tool is invoked, `SkillTool.execute()` creates a nested skill agent and returns its text result\n\n## Integration points in the wider codebase\n\nBased on current usage:\n\n- `create_app` calls:\n  - `build_default_skill_catalog()`\n  - `sync_skill_tool_registrations()`\n- `api/services/tasks.py` calls:\n  - `build_default_skill_catalog()` during initialization\n  - `refresh()` before running tasks\n  - `sync_skill_tool_registrations()` to keep tools current\n\nThis means skill discovery is both:\n\n- part of startup/bootstrap\n- refreshed during task execution to reflect workspace changes\n\n## Error model\n\nThe module primarily uses skill-specific exceptions from `api.errors`.\n\n### `SkillParseError`\n\nRaised when:\n\n- a skill file cannot be read\n- frontmatter is missing\n- frontmatter delimiters are malformed\n- frontmatter syntax is invalid\n- metadata validation fails\n\n`refresh()` catches `SkillParseError` per file, logs a warning, and continues.\n\n### `SkillNotFoundError`\n\nRaised by `SkillCatalog.get(name)` when the requested skill is absent.\n\n## Design notes and contributor guidance\n\n## Discovery is intentionally tolerant\n\n`SkillCatalog.refresh()` is designed for developer workflows where skill files may be temporarily invalid during editing. Preserve this behavior when changing refresh logic.\n\nIf you modify refresh behavior, keep these guarantees in mind:\n\n- one bad skill file should not break discovery of other skills\n- previously valid skills should be retained when possible\n- duplicate names should not silently overwrite each other\n\n## The parser is intentionally minimal\n\n`parse_skill_frontmatter()` is not a general YAML parser. If you expand the supported syntax, do so carefully and preserve current error clarity.\n\nThe current implementation is easy to reason about and avoids introducing a full YAML dependency surface into skill loading.\n\n## Availability is separate from validity\n\nA skill can be:\n\n- valid but unavailable due to missing binaries or env vars\n- invalid due to malformed metadata or duplicate naming\n\nOnly available skills are registered as tools, but unavailable skills still belong in the catalog so callers can inspect why they are not usable.\n\n## Skill execution stays inside the agent system\n\n`SkillTool.execute()` does not directly interpret skill markdown. Instead, it delegates to `create_skill_agent()` and `invoke_agent()`.\n\nThat boundary is important because it preserves:\n\n- backend consistency\n- prompt composition behavior\n- tool inheritance\n- permission enforcement\n- thread isolation\n\nAvoid shortcuts that bypass the shared agent invocation path unless the architecture intentionally changes.\n\n## Testing hooks are built in\n\nSeveral functions accept injected dependencies for deterministic tests:\n\n- `SkillCatalog.refresh(..., environment=..., which_resolver=...)`\n- `discover_skill(..., environment=..., which_resolver=...)`\n- `missing_requirements(..., environment=..., which_resolver=...)`\n- `requirements_met(..., environment=..., which_resolver=...)`\n\nUse these instead of patching global process state where possible.\n\n## Module map\n\n- `api/skills/__init__.py` — package exports\n- `api/skills/models.py` — immutable skill metadata and discovery result models\n- `api/skills/loader.py` — file reading and frontmatter parsing\n- `api/skills/requirements.py` — binary/env availability checks\n- `api/skills/discovery.py` — workspace scanning and reload-safe catalog refresh\n- `api/skills/bridge.py` — tool-registry integration and nested agent execution\n\n## Example usage\n\n## Build and refresh a catalog\n\n```python\nfrom api.skills.discovery import SkillCatalog\n\ncatalog = SkillCatalog()\nresult = catalog.refresh(\"/path/to/workspace\")\n\nprint(result.available)\nprint(result.unavailable)\nprint(result.invalid)\n```\n\n## Load only the skill body\n\n```python\nfrom api.skills.loader import load_skill_content\n\ncontent = load_skill_content(\"/path/to/workspace/skills/my-skill/SKILL.md\")\n```\n\n## Register available skills as tools\n\n```python\nfrom api.skills.discovery import build_default_skill_catalog\nfrom api.skills.bridge import sync_skill_tool_registrations\n\ncatalog = build_default_skill_catalog(\"/path/to/workspace\")\nactive_tools = sync_skill_tool_registrations(tool_registry, catalog)\n```\n\n## Check requirements without full discovery\n\n```python\nfrom api.skills.models import SkillMeta\nfrom api.skills.requirements import missing_requirements\n\nmeta = SkillMeta(\n    name=\"example\",\n    description=\"Example skill\",\n    bins=[\"git\"],\n    env=[\"GITHUB_TOKEN\"],\n)\n\nmissing = missing_requirements(meta)\n```","task-orchestration-and-workflow-execution-graphs":"# Task Orchestration and Workflow Execution — graphs\n\n# Task Orchestration and Workflow Execution — graphs\n\nThe `api.graphs` package defines the LangGraph workflows used to execute tasks.\n\nIt exposes two compiled graph builders:\n\n- `build_planning_graph()` for plan-only tasks\n- `build_implementation_graph()` for tasks that move from planning into coding and verification\n\nIt also exposes the routing functions that determine how execution moves between nodes after review and verification steps:\n\n- `route_after_review()`\n- `route_after_plan_review()`\n- `route_after_verification()`\n- `route_after_code_review()`\n\nThese graphs are the orchestration layer between task state models (`PlanningState`, `ImplementationState`) and the node builders in `api.nodes.*`.\n\n## Module layout\n\n```python\nfrom api.graphs.implementation import (\n    build_implementation_graph,\n    route_after_code_review,\n    route_after_plan_review,\n    route_after_verification,\n)\nfrom api.graphs.planning import build_planning_graph, route_after_review\n```\n\n`api/graphs/__init__.py` is a thin export surface. The actual workflow definitions live in:\n\n- `api/graphs/planning.py`\n- `api/graphs/implementation.py`\n\n## Responsibilities\n\nThis module is responsible for:\n\n- constructing `StateGraph(...)` instances with the correct state type\n- wiring named workflow nodes to node factories from `api.nodes.*`\n- defining deterministic transitions between nodes\n- compiling the graph, optionally with a PostgreSQL-backed checkpointer\n- enforcing iteration limits through routing logic rather than node internals\n\nThis module is **not** responsible for:\n\n- generating plans, revisions, code, or reviews\n- running verification itself\n- persisting artifacts directly\n- selecting task type at runtime\n\nTask-type selection happens higher up in `api/services/execution.py`, which calls either `build_planning_graph()` or `build_implementation_graph()`.\n\n---\n\n## Planning graph\n\nDefined in `api/graphs/planning.py`.\n\n### Purpose\n\nThe planning graph runs a plan/review/revise loop for tasks that only require a reviewed plan. It creates a task branch, generates a plan, reviews it, and either:\n\n- ends successfully on approval\n- ends unsuccessfully on rejection\n- loops through revision until approval/rejection/max iterations\n\n### State type\n\n```python\ngraph = StateGraph(PlanningState)\n```\n\nThe graph expects a `PlanningState`, and its routing logic reads at least:\n\n- `state.review`\n- `state.iteration`\n- `state.max_iterations`\n\n### Nodes\n\n`build_planning_graph()` registers four nodes:\n\n- `\"setup_branch\"` via `build_setup_branch_node(...)`\n- `\"plan\"` via `build_planner_node(...)`\n- `\"review\"` via `build_reviewer_node(...)`\n- `\"revise\"` via `build_reviser_node(...)`\n\n### Execution flow\n\n```mermaid\nflowchart TD\n    A[START] --> B[setup_branch]\n    B --> C[plan]\n    C --> D[review]\n    D -->|approved/rejected/max_iterations| E[END]\n    D -->|revise| F[revise]\n    F --> D\n```\n\n### Graph construction\n\nThe graph is assembled in this order:\n\n```python\ngraph.add_edge(START, \"setup_branch\")\ngraph.add_edge(\"setup_branch\", \"plan\")\ngraph.add_edge(\"plan\", \"review\")\ngraph.add_conditional_edges(\"review\", route_after_review, {...})\ngraph.add_edge(\"revise\", \"review\")\n```\n\nA few details matter here:\n\n- `setup_branch` always runs first, so downstream nodes operate on the task branch prepared by `build_setup_branch_node(...)`.\n- `plan` always feeds into `review`.\n- `review` is the only conditional branch point.\n- `revise` loops back to `review`, not to `plan`. The reviser node is expected to update the plan/review context so the next review can evaluate the revised output.\n\n### Routing: `route_after_review(state)`\n\n```python\ndef route_after_review(state: PlanningState) -> PlanningRoute:\n    if state.review is None:\n        return \"revise\"\n    if state.review.status == \"APPROVED\":\n        return \"approved\"\n    if state.review.status == \"REJECTED\":\n        return \"rejected\"\n    if state.iteration >= state.max_iterations:\n        return \"max_iterations\"\n    return \"revise\"\n```\n\nThis function is intentionally deterministic and conservative.\n\n#### Behavior\n\n- If no persisted review exists (`state.review is None`), the graph assumes review output was missing or unparseable and sends execution to `\"revise\"`.\n- If review status is `\"APPROVED\"`, the graph terminates.\n- If review status is `\"REJECTED\"`, the graph terminates.\n- If the iteration limit has been reached, the graph terminates.\n- Otherwise, it loops to revision.\n\n#### Why the `None` fallback matters\n\nTests indicate this fallback is important when review output cannot be parsed into a persisted review object. Instead of crashing or terminating ambiguously, the graph treats that as a revision request.\n\n---\n\n## Implementation graph\n\nDefined in `api/graphs/implementation.py`.\n\n### Purpose\n\nThe implementation graph extends the planning loop into a full implementation workflow:\n\n1. create/setup the task branch\n2. generate a plan\n3. review the plan\n4. revise until the plan is acceptable\n5. write code\n6. run deterministic verification\n7. review the code\n8. revise code until approved, rejected, or iteration limits are reached\n\nThis graph reuses the planning loop on a single task branch, then transitions into coding and verification.\n\n### State type\n\n```python\ngraph = StateGraph(ImplementationState)\n```\n\nThe routing logic reads fields including:\n\n- `state.review`\n- `state.iteration`\n- `state.max_iterations`\n- `state.verification`\n- `state.code_review`\n- `state.code_iteration`\n- `state.max_code_iterations`\n\n### Nodes\n\n`build_implementation_graph()` registers seven nodes:\n\n- `\"setup_branch\"` via `build_setup_branch_node(...)`\n- `\"plan\"` via `build_planner_node(...)`\n- `\"review\"` via `build_reviewer_node(...)`\n- `\"revise\"` via `build_reviser_node(...)`\n- `\"code\"` via `build_coder_node(...)`\n- `\"code_review\"` via `build_code_reviewer_node(...)`\n- `\"verify\"` via `build_verification_node(...)`\n\n### Execution flow\n\n```mermaid\nflowchart TD\n    A[START] --> B[setup_branch]\n    B --> C[plan]\n    C --> D[review]\n    D -->|revise| E[revise]\n    E --> D\n    D -->|approved| F[code]\n    F --> G[verify]\n    G -->|retry code| F\n    G -->|reviewable| H[code_review]\n    H -->|revise| F\n    H -->|approved/rejected/max_iterations| I[END]\n    D -->|rejected/max_iterations| I\n    G -->|terminal_failure| I\n```\n\n### Graph construction\n\nThe implementation graph is built with these key transitions:\n\n```python\ngraph.add_edge(START, \"setup_branch\")\ngraph.add_edge(\"setup_branch\", \"plan\")\ngraph.add_edge(\"plan\", \"review\")\ngraph.add_conditional_edges(\"review\", route_after_plan_review, {...})\ngraph.add_edge(\"revise\", \"review\")\ngraph.add_edge(\"code\", \"verify\")\ngraph.add_conditional_edges(\"verify\", route_after_verification, {...})\ngraph.add_conditional_edges(\"code_review\", route_after_code_review, {...})\n```\n\nImportant design choices:\n\n- The planning phase is structurally similar to the standalone planning graph.\n- Approval from plan review does **not** end the workflow; it transitions into `\"code\"`.\n- Verification sits between coding and code review.\n- Failed verification generally loops back to `\"code\"` before any code review occurs.\n- Code review can send execution back to `\"code\"` for another implementation pass.\n\n---\n\n## Routing functions in the implementation graph\n\n### `route_after_plan_review(state)`\n\n```python\ndef route_after_plan_review(state: ImplementationState) -> PlanningRoute:\n    if state.review is None:\n        return \"revise\"\n    if state.review.status == \"APPROVED\":\n        return \"code\"\n    if state.review.status == \"REJECTED\":\n        return \"rejected\"\n    if state.iteration >= state.max_iterations:\n        return \"max_iterations\"\n    return \"revise\"\n```\n\nThis is the planning-stage router for implementation tasks.\n\n#### Behavior\n\n- Missing review object → `\"revise\"`\n- Approved plan → `\"code\"`\n- Rejected plan → `\"rejected\"` and terminate\n- Iteration limit reached → `\"max_iterations\"` and terminate\n- Anything else → `\"revise\"`\n\nThis differs from `route_after_review()` in one crucial way: approval advances into coding instead of ending the graph.\n\n---\n\n### `route_after_verification(state)`\n\n```python\ndef route_after_verification(state: ImplementationState) -> VerificationRoute:\n    if state.verification is None:\n        return \"code\"\n    if state.verification.status == \"passed\" or state.verification.baseline_only_failures:\n        return \"code_review\"\n    if state.verification.status == \"terminal_error\":\n        return \"terminal_failure\"\n    return \"code\"\n```\n\nThis router controls the handoff between deterministic verification and human/LLM code review.\n\n#### Behavior\n\n- Missing verification result → retry `\"code\"`\n- Verification passed → `\"code_review\"`\n- Verification has `baseline_only_failures` → `\"code_review\"`\n- Verification status `\"terminal_error\"` → `\"terminal_failure\"` and terminate\n- Any other verification outcome → retry `\"code\"`\n\n#### `baseline_only_failures`\n\nThe special case for `state.verification.baseline_only_failures` allows the workflow to proceed to code review even when verification did not fully pass, as long as failures are attributable only to the baseline. This prevents the graph from blocking implementation progress on pre-existing failures.\n\n#### Why verification routes back to code\n\nTests show the intended behavior is to rerun coding after failed verification **before** code review. That keeps code review focused on implementations that are at least verification-clean or reviewable under the baseline-only rule.\n\n---\n\n### `route_after_code_review(state)`\n\n```python\ndef route_after_code_review(state: ImplementationState) -> ImplementationRoute:\n    if state.code_review is None:\n        return \"revise\"\n    if state.code_review.status == \"APPROVED\":\n        return \"approved\"\n    if state.code_review.status == \"REJECTED\":\n        return \"rejected\"\n    if state.code_iteration >= state.max_code_iterations:\n        return \"max_iterations\"\n    return \"revise\"\n```\n\nThis router controls the final implementation loop after code review has been persisted.\n\n#### Behavior\n\n- Missing code review object → `\"revise\"`\n- Approved code review → `\"approved\"` and terminate\n- Rejected code review → `\"rejected\"` and terminate\n- Code iteration limit reached → `\"max_iterations\"` and terminate\n- Otherwise → `\"revise\"`\n\nIn the graph, `\"revise\"` maps back to `\"code\"`:\n\n```python\ngraph.add_conditional_edges(\n    \"code_review\",\n    route_after_code_review,\n    {\n        \"approved\": END,\n        \"rejected\": END,\n        \"max_iterations\": END,\n        \"revise\": \"code\",\n    },\n)\n```\n\nSo although the route label is `\"revise\"`, the actual next node is `\"code\"`. This is a naming asymmetry worth knowing when reading the code: the route value expresses the outcome category, while the edge map decides the concrete next node.\n\n---\n\n## Dependency injection and customization\n\nBoth graph builders are designed for dependency injection. They accept optional overrides for node factories and infrastructure dependencies.\n\n## Common parameters\n\nBoth builders accept:\n\n- `settings: Settings`\n- `artifact_executor: asyncpg.Connection | asyncpg.Pool`\n- `checkpointer: AsyncPostgresSaver | None = None`\n- `git_client: GitClient | None = None`\n- `agent_registry: AgentRegistry | None = None`\n- `tool_registry: ToolRegistry | None = None`\n\n### `artifact_executor`\n\nPassed through to nodes that need database-backed artifact persistence.\n\n### `checkpointer`\n\nIf provided, passed to `graph.compile(checkpointer=checkpointer, ...)`. This enables checkpointed execution and recovery, which is used by task execution and startup recovery flows.\n\n### `git_client`\n\nPassed into branch setup and other git-aware nodes. This is part of how task execution stays tied to a task branch.\n\n### `agent_registry` and `tool_registry`\n\nThese are forwarded into agent-producing nodes such as planner, reviewer, reviser, coder, and code reviewer. They connect graph construction to the agent/tool configuration system used elsewhere in the codebase.\n\nTests and execution flows show that graph construction can trigger downstream registry/tool resolution through node builders.\n\n## Planning-specific injectable factories\n\n`build_planning_graph()` accepts:\n\n- `planner_agent_factory`\n- `reviewer_agent_factory`\n- `reviser_agent_factory`\n\nThese are passed directly into:\n\n- `build_planner_node(...)`\n- `build_reviewer_node(...)`\n- `build_reviser_node(...)`\n\n## Implementation-specific injectable factories\n\n`build_implementation_graph()` additionally accepts:\n\n- `coder_agent_factory`\n- `code_reviewer_agent_factory`\n- `verification_runner`\n- `verification_config`\n\nThese are passed into:\n\n- `build_coder_node(...)`\n- `build_code_reviewer_node(...)`\n- `build_verification_node(...)`\n\nThis makes the implementation graph easy to test and to adapt for alternate execution environments.\n\n---\n\n## Relationship to `api.nodes.*`\n\nThe graphs do not implement task logic themselves. They compose node callables returned by builder functions:\n\n- `build_setup_branch_node`\n- `build_planner_node`\n- `build_reviewer_node`\n- `build_reviser_node`\n- `build_coder_node`\n- `build_code_reviewer_node`\n- `build_verification_node`\n\nA useful mental model:\n\n- **nodes** perform work and update state\n- **graphs** decide what runs next based on that state\n\nBecause the routing functions inspect persisted state fields like `state.review`, `state.code_review`, and `state.verification`, node implementations must leave the state in a shape these routers understand.\n\n---\n\n## Relationship to task execution services\n\nThe main integration point is `build_task_graph` in `api/services/execution.py`, which selects and invokes:\n\n- `build_planning_graph()` for planning tasks\n- `build_implementation_graph()` for implementation tasks\n\nThis means the graphs are the executable workflow definitions used by higher-level task lifecycle operations such as:\n\n- running a task graph\n- resuming from checkpoints\n- inspecting recovery/debug state\n\nCheckpoint support is especially important here because startup and recovery paths rely on rebuilding the same graph shape and resuming execution consistently.\n\n---\n\n## Return value\n\nBoth builder functions return the result of:\n\n```python\ngraph.compile(...)\n```\n\nSo callers receive a compiled LangGraph runnable, not the raw `StateGraph`.\n\nCompiled names are:\n\n- `\"planning_loop\"` from `build_planning_graph()`\n- `\"implementation_loop\"` from `build_implementation_graph()`\n\nThese names can be useful in debugging and checkpoint-related tooling.\n\n---\n\n## Design patterns and conventions\n\n## 1. Routing is state-driven and deterministic\n\nThe routing functions contain no side effects. They only inspect state and return a literal route label. This makes them easy to test and reason about.\n\n## 2. Missing persisted outputs fall back to retry/revision paths\n\nExamples:\n\n- `state.review is None` → `\"revise\"`\n- `state.code_review is None` → `\"revise\"`\n- `state.verification is None` → `\"code\"`\n\nThis avoids hard failures when a node could not produce a parseable persisted result.\n\n## 3. Iteration limits are enforced at graph boundaries\n\nThe graph, not the node, decides when to stop after too many review/code cycles:\n\n- `state.iteration >= state.max_iterations`\n- `state.code_iteration >= state.max_code_iterations`\n\nThat keeps loop control centralized.\n\n## 4. Verification gates code review\n\nIn the implementation workflow, code review only happens after verification says the result is reviewable:\n\n- passed\n- or baseline-only failures\n\nOtherwise the workflow returns to coding.\n\n---\n\n## Practical reading guide\n\nWhen modifying this module, read it in this order:\n\n1. `build_planning_graph()` / `build_implementation_graph()` to understand node topology\n2. the corresponding routing functions to understand loop termination and retries\n3. the state models (`PlanningState`, `ImplementationState`) to confirm which fields routers depend on\n4. the node builders in `api.nodes.*` to verify they populate those fields correctly\n\nIf you change a node’s output shape or status values, update the routing functions accordingly.\n\n---\n\n## Extension points\n\nCommon ways to extend this module safely:\n\n### Add a new node to a workflow\n\nInsert a new `graph.add_node(...)` and wire it with `add_edge(...)` or `add_conditional_edges(...)`. Prefer adding a dedicated routing function if branching depends on state.\n\n### Change approval/retry semantics\n\nUpdate the relevant router:\n\n- planning-only review behavior → `route_after_review()`\n- implementation planning review behavior → `route_after_plan_review()`\n- verification behavior → `route_after_verification()`\n- code review behavior → `route_after_code_review()`\n\n### Swap agent implementations in tests or alternate deployments\n\nPass custom factories into the graph builder rather than modifying node builder internals.\n\n### Enable resumable execution\n\nProvide an `AsyncPostgresSaver` via `checkpointer`.\n\n---\n\n## Exported API\n\n## `build_planning_graph(...)`\n\nBuilds and compiles the plan-only workflow using `PlanningState`.\n\n## `route_after_review(state: PlanningState) -> Literal[\"approved\", \"rejected\", \"max_iterations\", \"revise\"]`\n\nRoutes after plan review in the planning-only workflow.\n\n## `build_implementation_graph(...)`\n\nBuilds and compiles the implementation workflow using `ImplementationState`.\n\n## `route_after_plan_review(state: ImplementationState) -> Literal[\"code\", \"rejected\", \"max_iterations\", \"revise\"]`\n\nRoutes after plan review in the implementation workflow.\n\n## `route_after_verification(state: ImplementationState) -> Literal[\"code\", \"code_review\", \"terminal_failure\"]`\n\nRoutes after deterministic verification.\n\n## `route_after_code_review(state: ImplementationState) -> Literal[\"approved\", \"rejected\", \"max_iterations\", \"revise\"]`\n\nRoutes after persisted code review.\n\n---\n\n## Summary\n\n`api.graphs` is the workflow-definition layer for task execution.\n\n- `planning.py` defines a compact plan/review/revise loop.\n- `implementation.py` builds on that pattern and adds coding, verification, and code review.\n- Routing is explicit, deterministic, and driven by persisted state.\n- Node builders do the work; graphs control sequencing and termination.\n- The module is designed for checkpointed execution, dependency injection, and testability.","task-orchestration-and-workflow-execution-nodes":"# Task Orchestration and Workflow Execution — nodes\n\n# Task Orchestration and Workflow Execution — nodes\n\nThe `api.nodes` package contains the executable units used by the task workflow graphs. Each node is a factory that binds runtime dependencies—settings, git access, agent configuration, artifact persistence, and verification services—and returns an async callable that consumes workflow state and produces a partial state update.\n\nThese nodes implement the core execution loop for:\n\n- task branch setup\n- planning\n- plan review and revision\n- implementation\n- deterministic verification\n- code review and revision\n\nAt a high level, the package turns workflow state models such as `PlanningState`, `ImplementationState`, and `SetupBranchState` into concrete side effects on a task-specific git clone plus structured state transitions for the graph layer.\n\n## Package surface\n\n`api/nodes/__init__.py` re-exports the public node builders and selected related types/config:\n\n- `build_planner_node`\n- `build_reviewer_node`\n- `build_reviser_node`\n- `build_setup_branch_node`\n- `build_verification_node`\n- `SetupBranchState`\n- `PLANNER_AGENT_CONFIG`\n- `REVIEWER_AGENT_CONFIG`\n- `REVISER_AGENT_CONFIG`\n\nNotably, `coder.py` and `code_reviewer.py` are part of the implementation loop but are not re-exported from `__init__.py`.\n\n---\n\n## Design pattern used across nodes\n\nMost modules follow the same structure:\n\n1. `build_*_node(...)` resolves dependencies once.\n2. The builder returns an async inner function such as `planner_node(...)`.\n3. The inner function:\n   - logs scoped execution context\n   - resolves the task clone path\n   - creates or injects an agent/runner\n   - reads or writes files inside the clone\n   - performs git operations\n   - persists artifacts\n   - returns a dict of state updates for the graph\n\nThis pattern keeps graph construction simple while making nodes easy to test by dependency injection.\n\n### Common injected dependencies\n\nAcross the builders you will see optional injection points for:\n\n- `git_client: GitClient | None`\n- `agent_factory: ... | None`\n- `agent_registry: AgentRegistry | None`\n- `tool_registry: ToolRegistry | None`\n- `artifact_executor`\n- `verification_runner`\n- `verification_config`\n\nIf omitted, the node creates default implementations such as `GitClient()` or `VerificationRunner()`.\n\n---\n\n## Execution flow\n\nThe nodes are intended to be composed by graph builders in `api/graphs/*` and orchestrated by services in `api/services/execution.py`.\n\n```mermaid\nflowchart TD\n    A[setup_branch] --> B[planner]\n    B --> C[reviewer]\n    C -->|NEEDS_REVISION| D[reviser]\n    D --> C\n    C -->|APPROVED| E[coder]\n    E --> F[verification]\n    F --> G[code_reviewer]\n    G -->|NEEDS_REVISION| E\n    G -->|APPROVED| H[done]\n```\n\nThis diagram omits terminal branches like rejection, max-iteration exits, and terminal verification errors, but it captures the main loop structure.\n\n---\n\n## Shared execution-scope safeguards\n\nTwo helpers in `api/nodes/execution_scope.py` enforce filesystem isolation for task execution.\n\n### `get_task_clone_path(clone_root: Path, task_id: str) -> Path`\n\nBuilds the clone path as `<clone_root>/<task_id>` and verifies that the resolved path remains under `clone_root`.\n\nUsed by multiple nodes when `state.clone_path` is not already set.\n\n### `resolve_clone_write_path(clone_path: Path, workspace_root: Path, relative_path: str | Path) -> Path`\n\nResolves a path relative to the task clone and rejects:\n\n- path traversal outside the clone\n- writes into the shared `workspace_root`\n\nThis is the main protection used before writing plan files.\n\n### `_ensure_within_root(path: Path, root: Path, label: str)`\n\nInternal helper that raises `ExecutionScopeError` if a resolved path escapes its allowed root.\n\n### Why this matters\n\nThe nodes frequently accept paths from workflow state, especially `plan_path`. Without these checks, a malformed or hostile path could escape the task clone and overwrite unrelated files.\n\nThe same helper is also used outside the graph execution path—for example, task-reading services use `get_task_clone_path`, so these safeguards are part of the broader task isolation model.\n\n---\n\n## `setup_branch.py`\n\n`setup_branch.py` prepares the task-specific clone and branch before any agent work begins.\n\n### `SetupBranchState`\n\nA frozen Pydantic model containing the minimum state needed for setup:\n\n- `task_id`\n- `branch_name`\n- `plan_path`\n- `context: TaskContext`\n\n`context.base_branch` is especially important because it influences repository-context discovery and summary metadata.\n\n### `build_setup_branch_node(...)`\n\nReturns `setup_branch_node(state: SetupBranchState)`.\n\n#### What `setup_branch_node` does\n\n1. Computes the task clone path with `get_task_clone_path`.\n2. Clones `settings.target_repo` into that path.\n3. Determines whether the task branch already exists:\n   - if not, creates it with `create_branch`\n   - if yes, checks out from `origin/<branch_name>` and marks the setup mode as reused\n4. Resolves and creates the parent directory for `state.plan_path`.\n5. Resolves a trusted snapshot ref with `resolve_trusted_snapshot_ref(state.context.base_branch)`.\n6. Discovers repository context with `discover_repository_context(...)`.\n7. Persists a setup artifact via `persist_result_artifact(...)`.\n8. Returns:\n   - `\"clone_path\"`\n   - `\"repo_context\"`\n\n#### Branch reuse behavior\n\nThe node distinguishes between:\n\n- **created**: a new task branch is prepared\n- **reused**: an existing remote branch is checked out for continued work\n\nThat distinction is persisted in artifact metadata and reflected in `_build_setup_branch_summary(...)`.\n\n#### Returned state update\n\n```python\n{\n    \"clone_path\": str(clone_path),\n    \"repo_context\": repo_context,\n}\n```\n\nThis output is foundational for later nodes, which rely on the clone path and repository context.\n\n---\n\n## `planner.py`\n\nThe planner node creates the initial markdown implementation plan and commits it to the task branch.\n\n### `build_planner_node(...)`\n\nReturns `planner_node(state: PlanningState)`.\n\nThe builder resolves the `\"planner\"` agent config using `resolve_registered_agent_config(\"planner\", ...)`.\n\n### Planner execution\n\n`planner_node` performs the following:\n\n1. Resolves the clone path.\n2. Creates the planner agent with `create_agent(...)` unless a custom `agent_factory` is supplied.\n3. Builds the prompt with `_build_planner_prompt(state)`.\n4. Calls `invoke_agent(...)`.\n5. Extracts plain text with `extract_text(...)`.\n6. Rejects empty output with `AgentError`.\n7. Resolves `state.plan_path` safely with `resolve_clone_write_path(...)`.\n8. Writes the plan markdown to disk.\n9. Commits all changes with `client.commit_all(...)`.\n10. Persists a planning artifact with `_persist_planner_summary_artifact(...)`.\n11. Returns `{\"plan\": plan_content}`.\n\n### Prompt construction\n\n`_build_planner_prompt(state)` includes:\n\n- task ID and description\n- base branch\n- relevant files\n- plan output path\n- extra task context\n- GitHub issue/PR context via `format_github_context_for_prompt(...)`\n\nThe planner is instructed to return only markdown plan content.\n\n### Artifact summary generation\n\n`_persist_planner_summary_artifact(...)` stores a concise summary built by `_build_planner_summary(...)`, which tries to extract a meaningful first line from the plan using `_extract_plan_summary_line(...)`.\n\nThis keeps artifacts readable without storing the entire plan as the summary string.\n\n### Important behavior\n\nThe planner node is responsible for creating the first committed plan revision. That commit becomes the baseline for later planning review.\n\n---\n\n## `reviewer.py`\n\nThe reviewer node evaluates the current plan and returns structured review feedback.\n\n### `build_reviewer_node(...)`\n\nReturns `reviewer_node(state: PlanningState)`.\n\nThe builder resolves the `\"reviewer\"` agent config and configures the agent to return `ReviewFeedback`.\n\n### Reviewer execution\n\n`reviewer_node`:\n\n1. Computes `review_iteration = state.iteration + 1`.\n2. Resolves the clone path.\n3. Creates the reviewer agent with `response_format=ReviewFeedback`.\n4. Reads the current plan file from `state.plan_path`.\n5. Captures:\n   - `reviewed_head_commit` via `get_head_commit`\n   - `baseline_commit` via `get_ref_commit(..., \"HEAD~1\")`\n6. Loads the plan diff with `_load_plan_diff(...)`.\n7. Builds the prompt with `_build_reviewer_prompt(...)`.\n8. Invokes the agent.\n9. Parses structured output with `_extract_review_feedback(...)`.\n10. Persists a `review` artifact with `create_logged_artifact(...)`.\n11. Computes workflow status with `_get_state_status(...)`.\n12. Returns updated planning state fields.\n\n### Returned state update\n\n```python\n{\n    \"review\": review,\n    \"iteration\": review_iteration,\n    \"status\": next_status,\n    \"result\": result_text,\n}\n```\n\n### Review status mapping\n\n`_get_state_status(...)` maps `ReviewFeedback.status` into graph-level status:\n\n- `\"APPROVED\"` → `\"approved\"`\n- `\"REJECTED\"` → `\"rejected\"`\n- otherwise, if iteration limit reached → `\"max_iterations\"`\n- else → `\"in_progress\"`\n\n`_build_result_text(...)` returns `review.feedback` only for terminal outcomes.\n\n### Diff handling\n\n`_load_plan_diff(...)` requests a git diff for `state.plan_path` between the previous commit and current HEAD. For the first committed plan revision, the prompt falls back to a message indicating that this is the first committed revision.\n\n### Parse fallback\n\nIf the LLM output cannot be parsed into `ReviewFeedback`, `_extract_review_feedback(...)` logs an `LLMParseError` and falls back to:\n\n```python\nReviewFeedback(\n    status=\"NEEDS_REVISION\",\n    risk_level=\"medium\",\n    feedback=\"Review could not be parsed; treating as needs revision.\",\n    suggestions=[],\n)\n```\n\nThis is intentionally conservative: unparseable review output does not block the workflow with an exception, but it also does not silently approve the plan.\n\n---\n\n## `reviser.py`\n\nThe reviser node updates the plan in response to reviewer feedback and commits the revision.\n\n### `build_reviser_node(...)`\n\nReturns `reviser_node(state: PlanningState)`.\n\nThe builder resolves the `\"reviser\"` agent config.\n\n### Reviser execution\n\n`reviser_node`:\n\n1. Computes `revision_number = state.iteration + 1`.\n2. Requires `state.review`; otherwise raises `AgentError`.\n3. Resolves the clone path.\n4. Creates the reviser agent.\n5. Reads the current plan file.\n6. Builds a revision prompt with `_build_reviser_prompt(...)`.\n7. Invokes the agent and extracts text.\n8. Rejects empty revised plans with `AgentError`.\n9. Overwrites the plan file.\n10. Commits the revision with:\n    - subject from `_build_revision_subject(...)`\n    - body from `_build_revision_body(...)`\n11. Returns:\n    ```python\n    {\n        \"plan\": revised_plan,\n        \"status\": \"in_progress\",\n        \"result\": None,\n    }\n    ```\n\n### Prompt construction\n\nThe reviser prompt includes:\n\n- task metadata\n- reviewer status and risk level\n- reviewer feedback\n- reviewer suggestions\n- current plan markdown\n- GitHub issue/PR context\n\nThe agent is instructed to return the full revised markdown plan only.\n\n### Commit message strategy\n\n- `_build_revision_subject(...)` derives a concise subject from reviewer feedback.\n- `_build_revision_body(...)` records addressed reviewer suggestions in a structured list.\n\nThis makes plan revision history readable and auditable in git.\n\n---\n\n## `coder.py`\n\nThe coder node performs implementation work in the task clone and ensures the result is committed in a service-controlled way.\n\n### `build_coder_node(...)`\n\nReturns `coder_node(state: ImplementationState)`.\n\nThe builder resolves the `\"coder\"` agent config.\n\n### Attempt limiting\n\nBefore doing any work, `coder_node` checks:\n\n```python\nif state.code_attempt >= state.max_code_attempts:\n```\n\nIf true, it returns terminal state immediately:\n\n```python\n{\n    \"status\": \"max_iterations\",\n    \"result\": _build_max_attempt_result(state),\n}\n```\n\nThis is the implementation loop’s hard stop.\n\n### Coder execution\n\nIf attempts remain, `coder_node`:\n\n1. Increments `code_attempt`.\n2. Resolves the clone path.\n3. Creates the coder agent.\n4. Reads the approved plan file.\n5. Captures `previous_head_commit`.\n6. Builds the prompt with `_build_coder_prompt(...)`.\n7. Invokes the agent.\n8. Extracts a plain-text implementation summary.\n9. Rejects empty summaries with `AgentError`.\n10. Calls `_ensure_coder_changes_are_committed(...)`.\n11. Persists a code summary artifact with `_persist_coder_summary_artifact(...)`.\n12. Returns:\n    ```python\n    {\n        \"code_attempt\": code_attempt,\n        \"status\": \"in_progress\",\n        \"result\": None,\n    }\n    ```\n\n### Prompt construction\n\n`_build_coder_prompt(...)` combines several context blocks:\n\n- task metadata\n- relevant files\n- extra task context via `_format_extra_context(...)`\n- approved plan markdown\n- revision context via `_build_revision_context(...)`\n- verification context via `_build_verification_context(...)`\n- GitHub issue/PR context\n\nThe prompt explicitly tells the agent:\n\n- use filesystem, shell, and git capabilities\n- treat relevant files as hints, not a hard boundary\n- **do not run `git commit`**\n- leave changes in the working tree\n- return a short summary\n\n### Revision-aware coding\n\n`_build_revision_context(state)` changes behavior depending on whether this is the first implementation pass or a revision after code review.\n\nIf `state.code_review` exists, the prompt includes:\n\n- last reviewed commit\n- reviewer status\n- risk level\n- feedback\n- suggestions\n\nThis lets the coder act as a revision agent without needing a separate node.\n\n### Verification-aware coding\n\n`_build_verification_context(state)` adds guidance when the latest deterministic verification did not pass.\n\nIt distinguishes among:\n\n- `\"failed\"`\n- `\"error\"`\n- `\"timeout\"`\n\nand includes:\n\n- verified commit\n- command\n- summary\n- optional output excerpt\n\nThis gives the coder concrete failure context for the next attempt.\n\n### Service-owned commit enforcement\n\n`_ensure_coder_changes_are_committed(...)` is one of the most important behaviors in this package.\n\nIt enforces these rules:\n\n1. If the agent already changed HEAD, raise `AgentError`:\n   - agents must not create commits directly\n2. If there are no working tree changes, return the unchanged HEAD\n3. If there are changes, create a fallback commit with `git_client.commit_all(...)`\n4. Return the new HEAD commit\n\nThis ensures commit creation remains under service control even if the agent only edits files.\n\n### Fallback commit messages\n\n- `_build_fallback_commit_subject(...)` derives a concise subject from the coder summary\n- `_build_fallback_commit_body(...)` records addressed code-review items when revising\n\nExamples of subject patterns:\n\n- first pass: `code: implement ...`\n- revision pass: `code: revision N - ...`\n\n### Max-attempt result text\n\n`_build_max_attempt_result(state)` explains why the implementation loop stopped, preferring the most actionable context:\n\n1. verification failure summary\n2. code review feedback\n3. generic max-attempt message\n\n---\n\n## `verification.py`\n\nThe verification node runs deterministic checks against the current implementation commit and records the result.\n\n### `build_verification_node(...)`\n\nReturns `verification_node(state: ImplementationState)`.\n\nThe builder initializes:\n\n- `GitClient`\n- `VerificationRunner`\n- `VerificationConfig`\n\nunless custom implementations are injected.\n\n### Verification execution\n\n`verification_node`:\n\n1. Resolves the clone path.\n2. Reads current `head_commit`.\n3. Computes baseline failures with `_resolve_verification_baseline_failures(...)`.\n4. Runs deterministic verification with `runner.run(...)`.\n5. Marks baseline-only failures with `_mark_baseline_only_failures(...)`.\n6. Persists a `verification` artifact with `_persist_verification_artifact(...)`.\n7. Returns a state update containing:\n   - `\"verification\"`\n   - `\"verification_baseline_failures\"`\n   - `\"status\"`\n   - `\"result\"`\n   - optionally `\"last_verified_commit\"` when verification passed\n\nIf `result.status == \"terminal_error\"`, the node sets `\"status\": \"terminal_error\"`.\n\n### Baseline failure capture\n\n`_resolve_verification_baseline_failures(...)` exists to avoid treating pre-existing test failures as regressions.\n\nIt only attempts baseline capture when:\n\n- no baseline failures are already stored\n- `state.code_attempt == 1`\n- the verification command looks like pytest (`_is_pytest_command(...)`)\n\nWhen enabled, it:\n\n1. resolves `HEAD~1` as the baseline ref\n2. creates a detached worktree at that ref\n3. runs verification there with a quieter config from `_build_baseline_capture_config(...)`\n4. collects `baseline_result.failing_tests`\n5. removes the temporary worktree\n\nIf any part of baseline capture fails, the node logs a warning and proceeds without a baseline.\n\n### Baseline-only failure marking\n\n`_mark_baseline_only_failures(...)` updates a failed `VerificationResult` when all current failing tests are already present in the baseline set.\n\nIt sets:\n\n- `baseline_only_failures = True`\n- a summary explaining that only pre-existing failures were found\n\nThis result is later used by the code reviewer prompt and artifact metadata.\n\n### Artifact persistence\n\n`_persist_verification_artifact(...)` stores:\n\n- status\n- command\n- exit code\n- verified head commit\n- duration\n- output excerpt\n- failing tests\n- baseline-only-failures flag\n\nThis artifact is the durable record of deterministic verification for each implementation attempt.\n\n---\n\n## `code_reviewer.py`\n\nThe code reviewer node evaluates implementation changes against the approved plan, repository context, and verification evidence.\n\n### `build_code_reviewer_node(...)`\n\nReturns `code_reviewer_node(state: ImplementationState)`.\n\nThe builder resolves the `\"code_reviewer\"` agent config and configures the agent for `ReviewFeedback`.\n\n### Code review execution\n\n`code_reviewer_node`:\n\n1. Computes `review_iteration = state.code_iteration + 1`.\n2. Resolves the clone path.\n3. Creates the code reviewer agent.\n4. Reads the approved plan file.\n5. Captures `reviewed_head_commit`.\n6. Resolves the baseline commit with `_resolve_baseline_commit(...)`:\n   - use `state.last_reviewed_commit` if present\n   - otherwise use the base branch ref\n7. Computes a git diff between baseline and reviewed HEAD, optionally scoped to `state.context.relevant_files`\n8. Builds the prompt with `_build_code_reviewer_prompt(...)`\n9. Invokes the agent\n10. Parses structured output with `_extract_review_feedback(...)`\n11. Persists a `code_review` artifact with verification metadata included\n12. Computes graph status with `_get_state_status(...)`\n13. Returns updated implementation state\n\n### Returned state update\n\n```python\n{\n    \"code_review\": review,\n    \"code_iteration\": review_iteration,\n    \"last_reviewed_commit\": reviewed_head_commit,\n    \"status\": next_status,\n    \"result\": result_text,\n}\n```\n\n### Prompt construction\n\n`_build_code_reviewer_prompt(...)` includes:\n\n- task metadata\n- base branch\n- relevant files\n- plan path\n- baseline commit\n- reviewed HEAD commit\n- approved plan markdown\n- latest code diff\n- verification evidence from `_build_verification_evidence_section(...)`\n- GitHub issue/PR context\n\nThe prompt explicitly tells the reviewer to use verification evidence to confirm that:\n\n- the reviewed HEAD matches the commit that passed deterministic verification, or\n- remaining failures only match the pre-implementation baseline\n\nThis is the key integration point between deterministic verification and LLM-based code review.\n\n### Verification evidence section\n\n`_build_verification_evidence_section(...)` includes, when available:\n\n- verification status\n- verified commit\n- reviewed HEAD commit\n- command\n- summary\n- baseline-only-failures note\n- output excerpt\n\n### Artifact metadata enrichment\n\n`_build_verification_metadata(state)` adds verification-related fields to the code review artifact:\n\n- `verification_status`\n- `verified_head_commit`\n- `verification_attempt`\n- `verification_baseline_only_failures`\n\nThis makes code review artifacts self-describing without requiring a join against verification artifacts.\n\n### Parse fallback\n\nLike the planning reviewer, `_extract_review_feedback(...)` catches parse failures and falls back to a conservative `NEEDS_REVISION` review.\n\n### Status mapping\n\n`_get_state_status(...)` mirrors the planning reviewer logic:\n\n- approved\n- rejected\n- max iterations\n- in progress\n\n---\n\n## Agent creation and configuration\n\nSeveral nodes use the same agent creation pattern:\n\n- resolve config with `resolve_registered_agent_config(...)`\n- create an agent with `create_agent(...)`\n- invoke it with `invoke_agent(...)`\n\n### Agent config names used\n\n- `planner`\n- `reviewer`\n- `reviser`\n- `coder`\n- `code_reviewer`\n\nThese names must exist in the active `AgentRegistry`.\n\n### Tool registry integration\n\nWhen the default `create_agent(...)` path is used, nodes pass `tool_registry=tool_registry`. This allows agent capabilities to be centrally configured and validated.\n\nThe execution-flow data shows that graph construction can fail early if required tools are missing from the registry.\n\n### Testability\n\nEach node defines a small `Protocol` for its agent factory, such as:\n\n- `PlannerAgentFactory`\n- `ReviewerAgentFactory`\n- `ReviserAgentFactory`\n- `CoderAgentFactory`\n- `CodeReviewerAgentFactory`\n\nThis makes it straightforward to inject fake agents in tests without coupling tests to the concrete agent implementation.\n\n---\n\n## Logging and failure behavior\n\nEvery node wraps execution in `logger.scoped_context(...)` and logs:\n\n- `\"Node started.\"`\n- `\"Node finished.\"`\n- `\"Node failed.\"` on exceptions\n\nExceptions are generally not swallowed. The main exceptions are parse fallbacks in reviewer nodes, where malformed LLM output is converted into conservative review feedback instead of aborting the workflow.\n\nThis gives the orchestration layer a consistent failure model:\n\n- operational failures raise\n- unparseable review output degrades into revision-required state\n\n---\n\n## Artifact strategy\n\nThe nodes persist two kinds of execution records:\n\n### `persist_result_artifact(...)`\n\nUsed for concise result summaries, such as:\n\n- setup branch summary\n- planner summary\n- coder summary\n\n### `create_logged_artifact(...)`\n\nUsed when richer typed artifact records are needed, such as:\n\n- planning review\n- verification\n- code review\n\nThese artifacts typically include:\n\n- task ID\n- version/attempt\n- agent name\n- summary or feedback content\n- structured metadata such as commits, statuses, suggestions, and verification details\n\nThis artifact trail is important for observability, debugging, and recovery.\n\n---\n\n## State contracts and graph integration\n\nThe nodes do not own the full workflow state; they return partial updates. The graph layer is responsible for merging those updates into the current state object and routing to the next node based on fields like `status`.\n\n### Planning loop state expectations\n\nPlanning nodes operate on `PlanningState` and commonly read:\n\n- `task_id`\n- `task_description`\n- `context`\n- `plan_path`\n- `iteration`\n- `max_iterations`\n- `review`\n- `repo_context`\n- `clone_path`\n- GitHub context fields\n\nThey return updates such as:\n\n- `plan`\n- `review`\n- `iteration`\n- `status`\n- `result`\n\n### Implementation loop state expectations\n\nImplementation nodes operate on `ImplementationState` and commonly read:\n\n- `task_id`\n- `task_description`\n- `branch_name`\n- `context`\n- `plan_path`\n- `code_attempt`\n- `max_code_attempts`\n- `code_iteration`\n- `max_code_iterations`\n- `code_review`\n- `verification`\n- `last_reviewed_commit`\n- `repo_context`\n- `clone_path`\n- GitHub context fields\n\nThey return updates such as:\n\n- `code_attempt`\n- `verification`\n- `verification_baseline_failures`\n- `last_verified_commit`\n- `code_review`\n- `code_iteration`\n- `last_reviewed_commit`\n- `status`\n- `result`\n\n---\n\n## Subtle but important implementation details\n\n### Plan writes are always scope-checked\n\nBoth planner and reviser use `resolve_clone_write_path(...)` before touching `state.plan_path`. Setup does the same when preparing the directory.\n\n### Planner and reviser commit immediately\n\nThe planning loop always commits plan changes before review. This gives the reviewer a stable git diff and commit anchors for artifacts.\n\n### Coder agents are not allowed to commit\n\nThe service enforces commit ownership. If the agent commits directly, `_ensure_coder_changes_are_committed(...)` raises an error.\n\n### Verification baseline capture is intentionally narrow\n\nBaseline failure capture only runs on the first implementation attempt and only for pytest-like commands. This keeps the feature useful without making verification expensive or overly generic.\n\n### Review parse failures do not halt the workflow\n\nBoth reviewer nodes degrade to `NEEDS_REVISION`. This avoids brittle orchestration when the LLM returns malformed structured output.\n\n### Code review compares against the last reviewed commit when available\n\nThis keeps later review iterations focused on incremental changes rather than repeatedly diffing against the base branch.\n\n---\n\n## Contributing guidance\n\nWhen modifying or adding nodes in this package, keep these conventions intact:\n\n### Preserve the builder pattern\n\nNew nodes should follow the existing `build_*_node(...) -> async callable` shape so they remain easy to compose into graphs and easy to test.\n\n### Keep state updates minimal and explicit\n\nReturn only the fields the graph needs to merge. Avoid mutating state objects in place.\n\n### Use execution-scope helpers for filesystem access\n\nAny path derived from workflow state should go through `get_task_clone_path(...)` or `resolve_clone_write_path(...)` as appropriate.\n\n### Prefer service-owned git operations\n\nIf an agent can modify files, the node should decide when and how commits are created.\n\n### Persist artifacts for externally meaningful steps\n\nIf a node produces a durable milestone, review decision, or verification result, record it through the observability layer.\n\n### Be conservative with LLM output parsing\n\nIf structured output is required, provide a safe fallback path where possible.\n\n### Include commit anchors in artifacts when relevant\n\nPlanner, coder, reviewer, and verification flows all benefit from commit-linked observability.\n\n---\n\n## Module-by-module reference\n\n### `api.nodes.__init__`\n\nPublic package exports for graph construction.\n\n### `api.nodes.execution_scope`\n\nFilesystem isolation helpers for task clones and writable paths.\n\n### `api.nodes.setup_branch`\n\nInitial clone/branch preparation and repository-context discovery.\n\n### `api.nodes.planner`\n\nInitial plan generation, file write, commit, and planning artifact persistence.\n\n### `api.nodes.reviewer`\n\nStructured review of the current plan, with git diff context and artifact logging.\n\n### `api.nodes.reviser`\n\nPlan revision based on reviewer feedback, followed by a git commit.\n\n### `api.nodes.coder`\n\nImplementation execution, revision-aware prompting, service-owned commit enforcement, and code summary artifacts.\n\n### `api.nodes.verification`\n\nDeterministic verification execution, baseline-failure detection, and verification artifact logging.\n\n### `api.nodes.code_reviewer`\n\nStructured review of implementation changes using plan, diff, and verification evidence.\n\n---\n\n## Relationship to the rest of the codebase\n\nThis package sits at the boundary between orchestration and execution:\n\n- **Graphs** decide ordering and transitions.\n- **Nodes** perform the actual work.\n- **Agents** generate plans, revisions, and reviews.\n- **GitClient** provides repository mutation and inspection.\n- **Observability** persists artifacts.\n- **VerificationRunner** executes deterministic checks.\n- **Task/repository context** enriches prompts and constrains execution.\n\nIn practice:\n\n- `api/services/execution.py` builds and runs graphs\n- `api/graphs/planning.py` and related graph modules compose these node callables\n- `api/agents/*` supplies agent configs and invocation helpers\n- `api/models/tasks.py` defines the state contracts the nodes consume and update\n\nThe result is a workflow system where graph logic stays declarative, while node modules encapsulate the operational details of each execution step.","task-orchestration-and-workflow-execution-services":"# Task Orchestration and Workflow Execution — services\n\n# Task Orchestration and Workflow Execution — services\n\nThe `api.services` package is the service-layer boundary for task lifecycle management and graph execution.\n\nIt exposes:\n\n- `TaskService` — the main orchestration class for creating, running, recovering, and inspecting tasks\n- `create_task_service()` — a convenience factory for building the process-wide service instance\n\n```python\nfrom api.services.tasks import TaskService, create_task_service\n```\n\n## What this module is responsible for\n\nThis layer sits between:\n\n- API/routes and operators\n- persistence repositories\n- LangGraph-based workflow graphs\n- Git/GitHub side effects\n- artifact and observability persistence\n\nIts responsibilities are:\n\n- validate and persist task submissions\n- enforce the current single-worker execution model\n- run task graphs behind a checkpoint boundary\n- recover interrupted tasks from checkpoints\n- translate graph terminal state into durable `TaskUpdate` records\n- reconcile GitHub publication state after approval or restart\n- expose enriched task/debug views for operators\n- create follow-up tasks for blocked PR reviews\n\nThe service layer does **not** implement planning or implementation logic itself. That logic lives in the graphs built by:\n\n- `build_planning_graph()`\n- `build_implementation_graph()`\n\nThe service layer wraps those graphs with lifecycle, persistence, recovery, and publication behavior.\n\n---\n\n## Package structure\n\n### `api/services/__init__.py`\n\nRe-exports the public service API:\n\n- `TaskService`\n- `create_task_service`\n\n### `api/services/execution.py`\n\nDefines the graph execution boundary and recovery/finalization helpers:\n\n- graph construction via `build_task_graph()`\n- graph invocation via `run_task_graph()`\n- checkpoint inspection via `inspect_task_recovery()` and `inspect_task_debug_snapshot()`\n- terminal graph result translation via `build_terminal_task_update()`\n\n### `api/services/tasks.py`\n\nDefines `TaskService`, which owns:\n\n- task creation\n- background scheduling\n- execution lifecycle\n- startup recovery\n- publication reconciliation\n- follow-up task creation\n- task/debug/artifact retrieval\n- failure/publication summary persistence\n\n---\n\n## High-level execution flow\n\n```mermaid\nflowchart TD\n    A[TaskService.create_task] --> B[create_task_record]\n    B --> C[_schedule_run]\n    C --> D[TaskService.run_task]\n    D --> E[run_task_graph]\n    E --> F[LangGraph ainvoke]\n    F --> G[build_terminal_task_update]\n    G --> H[_finalize_task]\n    H --> I[update_task + artifacts + status updates]\n```\n\nThis is the core path for a normal task:\n\n1. A task request is validated and persisted.\n2. The service schedules background execution.\n3. `run_task()` invokes the checkpointed graph.\n4. The graph returns terminal state.\n5. `build_terminal_task_update()` converts graph output into a `TaskUpdate`.\n6. `_finalize_task()` persists the terminal task state and related artifacts.\n\n---\n\n## Core types in `execution.py`\n\n## `TaskGraph`\n\nA protocol representing the compiled graph interface expected by the service layer:\n\n```python\nclass TaskGraph(Protocol):\n    async def ainvoke(\n        self,\n        input: dict[str, object] | None,\n        config: dict[str, object] | None = None,\n    ) -> dict[str, object]: ...\n```\n\nThe service only assumes `ainvoke()` for execution.\n\n## `RecoverableTaskGraph`\n\nExtends `TaskGraph` with checkpoint inspection support:\n\n```python\nclass RecoverableTaskGraph(TaskGraph, Protocol):\n    async def aget_state(\n        self,\n        config: dict[str, object],\n        *,\n        subgraphs: bool = False,\n    ) -> StateSnapshot: ...\n```\n\nRecovery and debug inspection depend on `aget_state()`.\n\n## `RecoveryDecision`\n\nRepresents what startup recovery should do for a running task:\n\n- `action=\"restart\"` — no meaningful checkpoint state exists\n- `action=\"resume\"` — checkpoint has resumable next nodes\n- `action=\"finalize\"` — checkpoint is terminal and should be converted into a task update\n- `action=\"fail\"` — checkpoint state is inconsistent or not safely resumable\n\nOptional fields:\n\n- `graph_result`\n- `error`\n\n## `DebugSnapshot`\n\nBest-effort operator-facing snapshot assembled from:\n\n- checkpoint state\n- clone presence\n- git head commit resolution\n\nFields include:\n\n- `current_node`\n- `next_nodes`\n- `graph_status`\n- `clone_present`\n- `clone_path`\n- `head_commit`\n- `head_source`\n- `checkpoint_available`\n- `checkpoint_created_at`\n- `checkpoint_step`\n- `checkpoint_error`\n\n## `FailureSummaryContext`\n\nStructured context used to persist a terminal failure summary artifact and publish failure status updates.\n\nImportant fields:\n\n- `loop_stage`\n- `failure_kind`\n- `summary`\n- optional `head_commit`\n- optional publication metadata\n\n## `TerminalTaskFinalization`\n\nBundles:\n\n- `update: TaskUpdate`\n- optional `failure_summary: FailureSummaryContext`\n\nThis is the output of `build_terminal_task_update()`.\n\n---\n\n## Graph construction and invocation\n\n## `build_task_graph()`\n\n```python\ndef build_task_graph(\n    checkpointer: AsyncPostgresSaver,\n    settings: Settings,\n    task_type: TaskType,\n    artifact_executor: asyncpg.Connection | asyncpg.Pool | None = None,\n    *,\n    agent_registry: AgentRegistry | None = None,\n    tool_registry: ToolRegistry | None = None,\n) -> TaskGraph:\n```\n\nBuilds the correct compiled graph for the task type.\n\nBehavior:\n\n- requires `artifact_executor`; otherwise raises `AgentError`\n- dispatches by `task_type`\n  - `\"plan\"` → `build_planning_graph(...)`\n  - `\"implement\"` → `build_implementation_graph(...)`\n- rejects unsupported task types\n\nThis function is the default graph builder used by the service unless a custom `graph_builder` is injected.\n\n## `run_task_graph()`\n\n```python\nasync def run_task_graph(\n    task: TaskRecord,\n    settings: Settings,\n    *,\n    artifact_executor: asyncpg.Connection | asyncpg.Pool | None = None,\n    graph_builder: Callable[[AsyncPostgresSaver], TaskGraph] | None = None,\n    resume_from_checkpoint: bool = False,\n    git_client: GitClient | None = None,\n    github_client: GitHubClient | None = None,\n    agent_registry: AgentRegistry | None = None,\n    tool_registry: ToolRegistry | None = None,\n) -> dict[str, object]:\n```\n\nThis is the execution wrapper around the LangGraph checkpoint boundary.\n\n### What it does\n\n1. Validates that `task.plan_path` exists.\n2. Normalizes task context with `normalize_task_context()`.\n3. Opens an `AsyncPostgresSaver` using `settings.database_url`.\n4. Calls `checkpointer.setup()`.\n5. Builds the graph using either:\n   - injected `graph_builder(checkpointer)`, or\n   - `build_task_graph(...)`\n6. Decides graph input:\n   - if `resume_from_checkpoint=True`, passes `None`\n   - otherwise builds a fresh input payload\n7. Invokes `graph.ainvoke(...)` with:\n   - input payload\n   - `config={\"configurable\": {\"thread_id\": task.thread_id}}`\n\n### Graph input payload\n\nFor fresh execution, the payload includes:\n\n- `task_id`\n- `type`\n- `description`\n- `task_description`\n- `context`\n- `branch_name`\n- `plan_path`\n- `github_issue`\n- `github_pr`\n\nGitHub issue/PR context is loaded by `_load_graph_github_context()` when:\n\n- `task.github_repo` is set\n- at least one of `task.github_issue_number` or `task.github_pr_number` is set\n- `settings.gh_token` is configured\n\n### Why `thread_id` matters\n\nCheckpoint lookup and resume behavior are keyed by:\n\n```python\nconfig={\"configurable\": {\"thread_id\": task.thread_id}}\n```\n\nIn this service, `thread_id` is set to `task_id` when the task is created. That makes checkpoint identity stable across restarts.\n\n---\n\n## Recovery and debug inspection\n\n## `inspect_task_recovery()`\n\n```python\nasync def inspect_task_recovery(...)\n```\n\nDetermines how a task in `running` state should be handled during startup recovery.\n\n### Decision logic\n\nAfter loading the graph and calling `aget_state(...)`:\n\n- **No values and no next nodes** → `restart`\n- **Has next nodes**\n  - if clone exists → `resume`\n  - if clone is missing but `_can_resume_without_clone(next_nodes)` is true → `resume`\n  - otherwise → `fail`\n- **No next nodes, terminal graph status present**\n  - if `status` is in `TERMINAL_GRAPH_STATUSES` → `finalize`\n- otherwise → `fail`\n\n### Clone-sensitive resume behavior\n\n`_can_resume_without_clone()` currently allows resume without a clone only when all next nodes are `\"setup_branch\"`.\n\nThat is an important durability rule: if the graph may depend on unpushed local git state, the service refuses to resume when the clone is gone.\n\n## `inspect_task_debug_snapshot()`\n\n```python\nasync def inspect_task_debug_snapshot(...)\n```\n\nBuilds a non-mutating operator snapshot.\n\nIt combines:\n\n- clone existence from `get_task_clone_path(...)`\n- head commit resolution from either:\n  - clone (`git_client.get_head_commit`)\n  - target repo branch (`git_client.get_ref_commit`)\n- checkpoint inspection via `aget_state(...)`\n\nThis method is intentionally defensive:\n\n- if checkpoint inspection fails, it captures the exception string in `checkpoint_error`\n- if the graph does not support `aget_state()`, it reports that instead of failing the request\n\n### Current node resolution\n\n`_resolve_current_node(snapshot)` prefers:\n\n1. the first entry in `snapshot.next`\n2. otherwise the first named task in `snapshot.tasks`\n\n### Graph status resolution\n\n`_resolve_graph_status(...)` returns:\n\n- explicit `values[\"status\"]` if present\n- `\"in_progress\"` if a checkpoint exists and `next_nodes` is non-empty\n- otherwise `None`\n\n---\n\n## Translating graph results into task updates\n\n## Terminal statuses\n\n`execution.py` treats these graph statuses as terminal:\n\n```python\nTERMINAL_GRAPH_STATUSES = {\"approved\", \"rejected\", \"max_iterations\", \"terminal_error\"}\n```\n\n## `build_terminal_task_update()`\n\n```python\nasync def build_terminal_task_update(\n    task: TaskRecord,\n    graph_result: Mapping[str, object],\n    *,\n    settings: Settings,\n    git_client: GitClient | None = None,\n    artifact_executor: asyncpg.Connection | asyncpg.Pool | None = None,\n    github_client: GitHubClient | None = None,\n    publication_config: PublicationConfig | None = None,\n) -> TerminalTaskFinalization:\n```\n\nThis function is the bridge between graph semantics and task-table lifecycle semantics.\n\n### Behavior by graph outcome\n\n#### `status == \"approved\"`\n\nDelegates to `_build_approved_task_update()`.\n\nThis is the most complex path because approval is not enough by itself; the service still needs to ensure publication durability and optional GitHub publication.\n\n#### `status in {\"rejected\", \"max_iterations\", \"terminal_error\"}`\n\nReturns:\n\n- `TaskUpdate(status=\"failed\", result_type=graph_outcome, result=result_text, error=None)`\n- a `FailureSummaryContext` built by `_build_graph_failure_summary_context(...)`\n\nThis preserves the graph’s terminal reason in `result_type` while mapping the task lifecycle to `failed`.\n\n---\n\n## Approved-task finalization and publication\n\n## `_build_approved_task_update()`\n\nThis function turns an approved graph result into a successful task update only if branch durability and publication requirements are satisfied.\n\n### Steps\n\n1. Resolve clone path with `get_task_clone_path(...)`\n2. Load `reviewed_head_commit` via `find_latest_reviewed_head_commit(...)`\n3. Verify the authoritative branch already points at that commit using `_branch_matches_commit(...)`\n4. If not durable:\n   - require the clone to exist\n   - push the branch with `git_client.push(...)`\n5. Re-check durability\n6. If publication checkpoint persistence is available and task publication is pending:\n   - call `persist_publication_checkpoint(..., publication_status=\"published\")`\n7. Optionally publish to GitHub with `publish_task_to_github(...)`\n8. Return `TaskUpdate(status=\"succeeded\", result_type=\"approved\", ...)`\n\n### Why `reviewed_head_commit` is required\n\nApproval is only considered publishable if the service can prove which reviewed commit was approved. If `reviewed_head_commit` is missing, the task is converted into a failed publication finalization.\n\n### Failure modes during approved finalization\n\nFailures here do **not** mean the graph failed. They mean post-approval publication guarantees could not be satisfied.\n\n`_build_failed_publication_finalization()` returns:\n\n- `TaskUpdate(status=\"failed\", result_type=\"approved\", publication_status=\"failed\", ...)`\n- `FailureSummaryContext(loop_stage=\"publication\", ...)`\n\nFailure kinds include:\n\n- `missing_reviewed_head_commit`\n- `missing_clone`\n- `push_failed`\n- `durability_check_failed`\n\n### GitHub publication\n\nIf all of the following are true:\n\n- `task.github_repo is not None`\n- `settings.gh_token is not None`\n- `artifact_executor is not None`\n\nthen `_build_approved_task_update()` opens a GitHub client via `_get_github_client()` and calls `publish_task_to_github(...)`.\n\nThat function is responsible for PR creation/update and publication-state transitions such as:\n\n- `published`\n- `awaiting_human_review`\n- `blocked`\n- `merged`\n\nThe returned publication fields are copied into the final `TaskUpdate`.\n\n---\n\n## Failure summary generation\n\n## `format_failure_summary_text()`\n\n```python\ndef format_failure_summary_text(stage_label: str, detail: str) -> str:\n```\n\nBuilds a compact one-line summary such as:\n\n- `Task failed during execution: ...`\n- `Task failed during publication: ...`\n\nIt normalizes whitespace and truncates long details via `_normalize_failure_detail()`.\n\n## `_build_graph_failure_summary_context()`\n\nMaps graph terminal failures into a structured `FailureSummaryContext`.\n\n### Special handling for `terminal_error`\n\nIf `graph_result[\"verification\"]` exists and itself reports `status == \"terminal_error\"`, the failure is attributed to the `verification` loop stage and may include `verified_head_commit`.\n\nOtherwise it is treated as a graph execution failure with loop stage `execution`.\n\n### Stage inference for rejection/max-iteration outcomes\n\n`_infer_graph_failure_loop_stage()` uses task type and graph result shape to infer whether the failure belongs to:\n\n- `planning`\n- `implementation`\n\nSignals include presence of:\n\n- `verification`\n- `code_review`\n- `code_attempt`\n- `last_reviewed_commit`\n\nThis matters because the summary text differs:\n\n- planning rejection → `\"Review rejected the task plan\"`\n- implementation rejection → `\"Code review rejected the implementation\"`\n- planning max iterations → planning review exhausted\n- implementation max iterations → implementation review exhausted\n\n---\n\n## `TaskService`\n\n`TaskService` in `api/services/tasks.py` is the main orchestration boundary used by routes and startup hooks.\n\n## Construction\n\n```python\nTaskService(\n    db_pool,\n    settings,\n    graph_builder=None,\n    git_client=None,\n    github_client=None,\n    agent_registry=None,\n    tool_registry=None,\n    skill_catalog=None,\n    memory_write_policy=None,\n)\n```\n\n### Dependencies\n\n- `db_pool`: asyncpg pool for task/artifact persistence\n- `settings`: runtime configuration\n- optional injected graph/git/github registries for testing or customization\n\n### Initialization behavior\n\nIf dependencies are not provided, the service builds defaults:\n\n- `GitClient()`\n- `build_default_agent_registry()`\n- `build_default_tool_registry()`\n- `build_default_skill_catalog(settings.workspace_root)`\n- `MemoryWritePolicy()`\n\nIt also calls:\n\n```python\nsync_skill_tool_registrations(self._tool_registry, self._skill_catalog)\n```\n\nThis keeps tool registrations aligned with discovered skills.\n\n### Internal state\n\n- `_active_tasks: dict[str, asyncio.Task[None]]`\n- `_submission_lock: asyncio.Lock()`\n\nThe lock serializes submission and follow-up creation so the single-worker constraint is enforced consistently.\n\n---\n\n## Task creation\n\n## `create_task()`\n\n```python\nasync def create_task(self, request: TaskRequest) -> TaskRecord:\n```\n\nCreates a new task and schedules background execution.\n\n### Behavior\n\nInside `_submission_lock` and a DB transaction:\n\n1. Calls `find_running_task(connection)`\n2. If a running task exists, raises `WorkerBusyError`\n3. Generates a new task ID via `_generate_prefixed_id(\"task\")`\n4. Normalizes request context with `normalize_task_context()`\n5. Builds `TaskCreate` via `_build_task_create(...)`\n6. Persists it with `create_task_record(...)`\n\nAfter commit, it calls `_schedule_run(task.task_id)`.\n\n### Single-worker model\n\nThe service currently supports only one concurrent running task. The user-facing detail string is:\n\n```python\nWORKER_BUSY_DETAIL = \"A task is already running. Phase 01 supports one concurrent task.\"\n```\n\n## `_build_task_create()`\n\nBuilds the persisted task payload.\n\nDefaults:\n\n- `branch_name = f\"darkfactory/{task_id}\"`\n- `plan_path = f\".darkfactory/plans/{task_id}.md\"`\n- `thread_id = task_id`\n\nIt also resolves GitHub linkage and follow-up lineage.\n\n### Follow-up task behavior\n\nIf `request.follow_up_to_task_id` is set:\n\n- `request.reuse_branch` is required\n- the referenced task is loaded\n- `reuse_branch` must match the previous task’s `branch_name`\n- `origin_task_id` is inherited from the original chain root\n- `follow_up_to_task_id` is set to the immediate predecessor\n- GitHub linkage is inherited/validated via `_resolve_follow_up_github_linkage()`\n\nThis preserves branch continuity and task-chain lineage.\n\n---\n\n## Follow-up tasks for blocked PR reviews\n\n## `create_follow_up_task()`\n\n```python\nasync def create_follow_up_task(\n    *,\n    blocked_task_id: str,\n    description: str,\n    triggering_review: Review | None = None,\n) -> TaskRecord:\n```\n\nCreates or reuses the active follow-up task for a blocked PR review.\n\n### Key behaviors\n\n- resolves the latest task chain context with `_resolve_follow_up_creation_context()`\n- resolves the triggering review with `_resolve_triggering_review()`\n- builds an idempotency key with `_build_follow_up_trigger_key(...)`\n- checks `find_active_follow_up_by_trigger_key(...)`\n- enforces the same single-worker constraint as `create_task()`\n- creates a new task that:\n  - reuses the existing branch\n  - links back to the prior task\n  - inherits GitHub linkage\n  - carries review-aware context\n\n### Idempotency\n\nThe trigger key format is:\n\n```python\ngh_pr_review:{repo}:{pr_number}:{review_id}\n```\n\nThis prevents duplicate follow-up tasks for the same blocking review.\n\n## `create_blocked_pr_follow_up()`\n\nRefreshes publication state first, then delegates to `create_follow_up_task()`.\n\nUse this when the caller wants follow-up creation based on the latest GitHub review state rather than a pre-supplied `Review`.\n\n## Review-aware follow-up context\n\n`_build_follow_up_task_context()` enriches the new task context with:\n\n- `review_request_summary`\n- prioritized `relevant_files`\n\nIt uses:\n\n- `TaskContext.model_validate(...)`\n- `_summarize_review_for_context(review)`\n- `_list_follow_up_changed_files(...)`\n- `_prioritize_changed_files_for_review(...)`\n\n### File prioritization heuristics\n\n`_prioritize_changed_files_for_review()` combines:\n\n1. explicit file hints extracted from review text\n2. changed files mentioned by full path or basename in the review text\n3. all changed files\n\nThen it deduplicates and truncates to `FOLLOW_UP_RELEVANT_FILE_LIMIT`.\n\nSupporting helpers:\n\n- `_sanitize_review_text_for_file_hints()`\n- `_extract_review_file_hints()`\n- `_normalize_review_file_hint()`\n- `_looks_like_repo_relative_file_path()`\n- `_looks_like_file_basename()`\n- `_dedupe_preserving_order()`\n\nThese helpers are intentionally conservative to avoid treating arbitrary review text as file paths.\n\n---\n\n## Running tasks\n\n## `run_task()`\n\n```python\nasync def run_task(\n    self,\n    task_id: str,\n    *,\n    resume_from_checkpoint: bool = False,\n) -> None:\n```\n\nExecutes a task through the checkpointed graph wrapper.\n\n### Execution sequence\n\n1. Load the task with `get_task(...)`\n2. Refresh the skill catalog:\n   - `self._skill_catalog.refresh(...)`\n   - `sync_skill_tool_registrations(...)`\n3. Log start and publish a start status update comment\n4. Call `run_task_graph(...)`\n5. Convert graph output with `build_terminal_task_update(...)`\n6. Finalize via `_finalize_task(...)`\n7. On exceptions:\n   - log the exception\n   - build a failure summary with `_build_failure_summary_context(...)`\n   - finalize as `TaskUpdate(status=\"failed\", error=str(exc))`\n8. In `finally`, clean up the clone if appropriate\n\n### Cancellation\n\n`asyncio.CancelledError` is logged and re-raised. The service does not silently convert cancellation into task failure.\n\n---\n\n## Finalization\n\n## `_finalize_task()`\n\n```python\nasync def _finalize_task(\n    self,\n    task_id: str,\n    update: TaskUpdate,\n    *,\n    failure_summary: FailureSummaryContext | None = None,\n) -> TaskRecord:\n```\n\nThis is the durable lifecycle transition from `running` to terminal state.\n\n### Behavior\n\n1. Reload current task\n2. Require `current_task.status == \"running\"`\n   - otherwise raise `InvalidStateTransitionError`\n3. Persist the update with `update_task(...)`\n4. Persist failure summary artifact via `_persist_failure_summary_artifact(...)`\n5. Persist publication summary artifact via `_persist_publication_result_artifact(...)`\n6. Publish failure status update comment if needed\n7. Append governed task history via `_append_task_history(...)`\n\nThis method centralizes terminal side effects so both normal execution and recovery use the same finalization path.\n\n## Clone cleanup\n\n`_cleanup_clone()` removes the task clone only for succeeded tasks.\n\nFailed-task clones are intentionally retained:\n\n```python\nlogger.info(f\"Retaining failed-task clone for debugging: {task.task_id}\")\n```\n\nThat makes postmortem inspection possible.\n\n---\n\n## Startup recovery\n\n## `recover_running_tasks()`\n\n```python\nasync def recover_running_tasks(self) -> None:\n```\n\nUsed during startup to reconcile tasks left in `running` state.\n\nFor each running task ID:\n\n1. load the task\n2. call `inspect_task_recovery(...)`\n3. apply the result with `_apply_recovery_decision(...)`\n\n## `_apply_recovery_decision()`\n\nHandles each `RecoveryDecision.action`:\n\n- `restart` → `_schedule_run(task.task_id)`\n- `resume` → `_schedule_run(task.task_id, resume_from_checkpoint=True)`\n- `finalize`\n  - call `build_terminal_task_update(...)`\n  - `_finalize_task(...)`\n  - `_cleanup_clone(...)`\n- `fail`\n  - build recovery failure summary\n  - finalize as failed\n  - cleanup as appropriate\n\nThis keeps startup recovery behavior aligned with normal execution semantics.\n\n---\n\n## Publication recovery and reconciliation\n\nPublication recovery is separate from graph recovery. It handles tasks that already succeeded but whose GitHub publication state may be incomplete or stale after restart.\n\n## `recover_publication_tasks()`\n\nScans `list_latest_publication_recovery_candidates(...)` and reconciles tasks that satisfy `_should_recover_publication(task)`.\n\n## `recover_publication_task()`\n\nResolves the latest publication owner for a task chain, then reconciles that task if needed.\n\n## `reconcile_publication()`\n\nOperator-facing alias for `recover_publication_task()`.\n\n## `_recover_publication_task()`\n\nCalls `publish_task_to_github(...)` and persists the returned publication fields with `update_task(...)`.\n\nIt then persists a publication result artifact via `_persist_publication_result_artifact(...)`.\n\n### Which tasks are eligible?\n\n`_should_recover_publication(task)` returns true when:\n\n- `task.status == \"succeeded\"`\n- `task.github_repo is not None`\n- `task.publication_status` is one of:\n  - `\"published\"`\n  - `\"pr_open\"`\n  - `\"awaiting_human_review\"`\n  - `\"blocked\"`\n  - `\"failed\"`\n\nThis allows restart-time reconciliation of partially completed publication workflows.\n\n---\n\n## Task retrieval and debug views\n\n## `get_task()`\n\nSimple pass-through to `api.repositories.tasks.get_task()`.\n\n## `get_task_response()`\n\nReturns a `TaskResponse` enriched with:\n\n- resolved plan content\n- artifact summaries\n\n### Plan resolution\n\n`_resolve_task_plan()` tries, in order:\n\n1. `_read_plan_from_clone(task)`\n2. `_read_plan_from_branch(task)` if `_has_durable_branch_publication(task)`\n\nThis fallback matters after successful publication or clone cleanup. If the local clone is gone but the branch is durable, the plan can still be read from the authoritative repo using `GitClient.show_file_at_ref(...)`.\n\n## `get_task_debug_response()`\n\nReturns a `TaskDebugResponse` combining:\n\n- task fields\n- latest artifact IDs\n- latest execution summary artifact metadata\n- `inspect_task_debug_snapshot(...)`\n\nThis is the main operator-facing introspection endpoint for in-flight or failed tasks.\n\n---\n\n## GitHub status update comments\n\nThe service publishes lightweight status updates back to linked GitHub issues/PRs.\n\n## Start updates\n\n`_publish_task_start_status_update()` posts:\n\n```python\nbuild_pre_publication_status_comment(task, milestone=\"started\")\n```\n\n## Failure updates\n\n`_publish_task_failure_status_update()` posts a failure comment only when `task.status == \"failed\"`.\n\nFailure detail is resolved by `_resolve_status_update_failure_detail()` in this order:\n\n1. `failure_summary.summary`\n2. publication failure summary from `task.publication_error`\n3. execution failure summary from `task.error`\n\n## Comment persistence\n\n`_upsert_task_status_update_comment()` calls `upsert_status_update_comment(...)` only when `_should_publish_task_status_update(task)` is true.\n\nFailures to post comments are logged and swallowed; they do not fail task execution or finalization.\n\n---\n\n## Artifact persistence\n\n## Failure summary artifacts\n\n`_persist_failure_summary_artifact()` persists a result artifact when:\n\n- `task.status == \"failed\"`\n- a `FailureSummaryContext` is available\n\nArtifact characteristics:\n\n- `node_name=\"service\"`\n- `loop_stage=failure_summary.loop_stage`\n- metadata includes:\n  - `failure_kind`\n  - optional `head_commit`\n  - optional `publication_status`\n  - optional `github_pr_number`\n\nIf no failure summary is provided, the service logs a warning and skips artifact creation.\n\n## Publication summary artifacts\n\n`_persist_publication_result_artifact()` persists a publication summary artifact when `_should_persist_publication_summary(task)` is true:\n\n- task succeeded\n- `publication_error is None`\n- `publication_status` is in `PUBLICATION_SUMMARY_STATUSES`\n\nIt avoids duplicate artifacts by comparing the latest publication artifact with current:\n\n- `publication_status`\n- `github_pr_number`\n- `head_commit`\n\nSummary text is generated by `_build_publication_summary()`.\n\nExamples of publication summaries include:\n\n- durable branch published\n- awaiting human review\n- blocked by requested changes\n- merged\n\n---\n\n## GitHub client handling\n\nBoth `execution.py` and `tasks.py` define a private `_get_github_client()` async context manager with the same pattern:\n\n- use injected `github_client` if provided\n- otherwise, if `settings.gh_token` is absent, yield `None`\n- otherwise create `AsyncGitHubClient(settings.gh_token, base_url=settings.gh_base_url)`\n\nThis keeps GitHub access optional and testable.\n\n---\n\n## Important invariants and design choices\n\n## 1. Single in-process worker\n\nTask submission is serialized and guarded by `find_running_task(...)`.\n\nThis is not just an in-memory limit; it is checked against persisted task state inside a transaction.\n\n## 2. Checkpoint identity is stable\n\n`thread_id` is persisted as `task_id`, and all graph execution/recovery uses that same thread ID.\n\n## 3. Resume is clone-aware\n\nRecovery refuses to resume from checkpoints that may depend on missing local git state, except for the safe `\"setup_branch\"` case.\n\n## 4. Approval is not enough\n\nAn `\"approved\"` graph result still must pass publication durability checks before the task becomes `succeeded`.\n\n## 5. Finalization is centralized\n\nNormal execution and recovery both converge on `_finalize_task()`.\n\n## 6. Failed clones are retained\n\nThis is deliberate for debugging and operator inspection.\n\n## 7. Publication recovery is separate from execution recovery\n\nA task can be graph-complete but still need GitHub reconciliation.\n\n---\n\n## Extension points\n\n## Custom graph builder\n\nBoth `TaskService` and `run_task_graph()` accept `graph_builder`.\n\nExpected signature:\n\n```python\nCallable[[AsyncPostgresSaver], TaskGraph]\n```\n\nUse this in tests or when swapping graph implementations.\n\n## Injected clients and registries\n\nYou can inject:\n\n- `git_client`\n- `github_client`\n- `agent_registry`\n- `tool_registry`\n- `skill_catalog`\n- `memory_write_policy`\n\nThis makes the service layer highly testable and avoids hard-coding infrastructure dependencies.\n\n---\n\n## Helper functions worth knowing\n\n### In `execution.py`\n\n- `format_failure_summary_text()` — compact failure summary formatting\n- `_resolve_current_node()` — derive current node from checkpoint snapshot\n- `_resolve_debug_head_commit()` — resolve head commit from clone or target repo\n- `_extract_graph_head_commit()` — infer relevant commit from graph result\n- `_load_graph_github_context()` — fetch issue/PR context for fresh graph input\n\n### In `tasks.py`\n\n- `_resolve_follow_up_github_linkage()` — enforce GitHub linkage consistency across follow-ups\n- `_resolve_latest_publication_owner()` — walk task lineage to find the current publication owner\n- `_find_latest_blocked_review_source()` — find the task in the chain that is actually blocked\n- `_prioritize_changed_files_for_review()` — derive review-focused relevant files\n- `_should_recover_publication()` — gate startup publication reconciliation\n\n---\n\n## Typical integration points with the rest of the codebase\n\n## Repositories\n\nThis module depends heavily on repository functions for durable state:\n\n- task creation and updates from `api.repositories.tasks`\n- artifact lookup from `api.repositories.artifacts`\n\n## Graphs\n\nExecution is delegated to:\n\n- `api.graphs.planning.build_planning_graph`\n- `api.graphs.implementation.build_implementation_graph`\n\n## Git and GitHub\n\nSide effects are performed through:\n\n- `lib.git.GitClient`\n- `lib.github.AsyncGitHubClient` / `GitHubClient`\n- `api.github.publisher.publish_task_to_github`\n- `api.github.context.load_task_github_context`\n\n## Observability and memory\n\nArtifacts and history are persisted through:\n\n- `persist_result_artifact`\n- `find_latest_execution_summary_artifact`\n- `append_task_history_summary`\n\n## Skills and tools\n\nBefore each run, the service refreshes the skill catalog and re-syncs tool registrations:\n\n- `SkillCatalog.refresh(...)`\n- `sync_skill_tool_registrations(...)`\n\nThat means graph execution sees current skill/tool availability at run time, not only at process startup.\n\n---\n\n## Using the module\n\n## Build the service\n\n```python\nservice = create_task_service(db_pool, settings)\n```\n\nor directly:\n\n```python\nservice = TaskService(db_pool, settings)\n```\n\n## Create and run a task\n\n```python\ntask = await service.create_task(request)\n```\n\nExecution is scheduled automatically in the background.\n\n## Inspect a task\n\n```python\ntask_response = await service.get_task_response(task.task_id)\ndebug_response = await service.get_task_debug_response(task.task_id)\n```\n\n## Recover on startup\n\n```python\nawait service.recover_running_tasks()\nawait service.recover_publication_tasks()\n```\n\n## Shut down cleanly\n\n```python\nawait service.shutdown()\n```\n\nThis waits for in-flight background tasks, then cancels any still pending after `SHUTDOWN_GRACE_PERIOD_SECONDS`.\n\n---\n\n## Contributor notes\n\nWhen changing this module, keep these boundaries intact:\n\n- graph logic belongs in graph builders/nodes, not in `TaskService`\n- task lifecycle transitions should continue to funnel through `_finalize_task()`\n- recovery decisions should remain conservative around missing clones and git durability\n- publication reconciliation should stay idempotent\n- follow-up task creation must preserve lineage and branch reuse invariants\n\nIf you add new graph terminal states or publication states, update all of the following consistently:\n\n- `TERMINAL_GRAPH_STATUSES`\n- `build_terminal_task_update()`\n- failure summary inference helpers\n- publication summary persistence gates\n- recovery logic that interprets checkpoint terminal state","task-orchestration-and-workflow-execution-verification":"# Task Orchestration and Workflow Execution — verification\n\n# Verification Module\n\nThe `api.verification` package provides the deterministic verification step used by task orchestration after code has been prepared in a task-specific clone. Its public surface is intentionally small:\n\n```python\nfrom api.verification import VerificationRunner\n```\n\n`VerificationRunner` executes a configured command inside a clone, captures and normalizes its output, classifies the outcome into a `VerificationStatus`, and returns a `VerificationResult` suitable for downstream workflow decisions.\n\n## Purpose in the workflow\n\nThis module is the execution boundary between orchestration logic and repository-owned verification commands such as test runs. It is responsible for:\n\n- running the configured verification command in the cloned repository\n- ensuring the clone root is importable via `PYTHONPATH`\n- collecting `stdout` and `stderr`\n- stripping ANSI escape sequences from output\n- truncating output to a configured byte limit\n- classifying process outcomes into domain statuses\n- extracting failing pytest test identifiers when possible\n- emitting structured logging around start, completion, timeout, and startup failure\n\nIt does **not** decide whether verification should run, retry, or gate later workflow stages. That logic lives elsewhere; this module only executes and normalizes the verification step.\n\n## Public API\n\n## `VerificationRunner`\n\nDefined in `api/verification/runner.py`.\n\n### `async run(...) -> VerificationResult`\n\n```python\nasync def run(\n    self,\n    *,\n    clone_path: Path,\n    head_commit: str,\n    config: VerificationConfig,\n    attempt: int = 1,\n) -> VerificationResult:\n```\n\nRuns `config.command` inside `clone_path` and returns a normalized `VerificationResult`.\n\n### Parameters\n\n- `clone_path: Path`\n  - Filesystem path to the task clone where the command should execute.\n  - Also used to prefix `PYTHONPATH`.\n\n- `head_commit: str`\n  - Commit SHA associated with the code being verified.\n  - Stored in the result as `verified_head_commit` and included in log context.\n\n- `config: VerificationConfig`\n  - Supplies:\n    - `command`\n    - `timeout_seconds`\n    - `max_output_bytes`\n\n- `attempt: int = 1`\n  - Attempt number for logging and result metadata.\n\n### Result fields populated by this module\n\nThe returned `VerificationResult` includes:\n\n- `status`\n- `command`\n- `exit_code`\n- `summary`\n- `output_excerpt`\n- `failing_tests`\n- `verified_head_commit`\n- `duration_seconds`\n- `attempt`\n\nExact field types come from `api.models.verification`.\n\n## Execution flow\n\nThe runner has three main execution paths:\n\n1. **Command startup failure**\n   - `asyncio.create_subprocess_exec(...)` raises `OSError`\n   - result status becomes `\"terminal_error\"`\n\n2. **Command timeout**\n   - `asyncio.wait_for(process.communicate(), timeout=...)` times out\n   - process is killed\n   - result status becomes `\"timeout\"`\n\n3. **Command completes**\n   - output is combined and normalized\n   - exit code is mapped to a domain status\n   - summary and failing tests are derived\n\n```mermaid\nflowchart TD\n    A[VerificationRunner.run] --> B[build env + log start]\n    B --> C[create_subprocess_exec]\n    C -->|OSError| D[terminal_error result]\n    C --> E[communicate with timeout]\n    E -->|timeout| F[kill process + timeout result]\n    E -->|completed| G[combine output]\n    G --> H[normalize status]\n    H --> I[build summary/result]\n```\n\n## Detailed behavior\n\n## Process creation\n\n`run()` executes the configured command with:\n\n- `cwd=str(clone_path)`\n- `env=_build_verification_env(clone_path)`\n- `stdout=asyncio.subprocess.PIPE`\n- `stderr=asyncio.subprocess.PIPE`\n\nThe command is taken directly from `config.command`:\n\n```python\ncommand = list(config.command)\n```\n\nThis preserves argument boundaries and avoids shell parsing.\n\n### Startup failures\n\nIf process creation raises `OSError`, the runner treats that as a non-recoverable execution boundary failure:\n\n- status: `\"terminal_error\"`\n- exit code: `None`\n- summary: `\"Verification command could not start: ...\"`\n\nThe exception text is ANSI-stripped and truncated before being stored in `output_excerpt`.\n\n## Timeout handling\n\nThe subprocess is awaited with:\n\n```python\nstdout, stderr = await asyncio.wait_for(\n    process.communicate(),\n    timeout=config.timeout_seconds,\n)\n```\n\nOn timeout:\n\n- `process.kill()` is attempted inside `suppress(ProcessLookupError)`\n- output already produced by the process is still collected\n- combined output is truncated\n- failing pytest tests are extracted if possible\n- status is `\"timeout\"`\n- exit code is `None`\n- summary is `\"Verification timed out after {config.timeout_seconds}s.\"`\n\nThis path preserves partial output, which is useful for diagnosing hangs or slow failures.\n\n## Completed process handling\n\nWhen the process exits normally:\n\n1. `stdout` and `stderr` are combined with `_combine_output()`\n2. output is truncated with `_truncate_output()`\n3. status is derived from `_normalize_status()`\n4. failing tests are extracted with `_extract_failing_tests()`\n5. summary is built with `_build_summary()`\n\n## Status normalization\n\n`_normalize_status(command, exit_code)` maps raw process exit codes into domain-level `VerificationStatus`.\n\n### Rules\n\n- `exit_code == 0` → `\"passed\"`\n- pytest command:\n  - `exit_code == 1` → `\"failed\"`\n  - `exit_code in (3, 4)` → `\"terminal_error\"`\n- anything else → `\"error\"`\n\nThis is intentionally conservative. Only known pytest semantics get special handling.\n\n### Why pytest is special\n\nPytest uses distinct exit codes for test failures versus invocation/collection/configuration problems. The runner preserves that distinction:\n\n- `\"failed\"` means tests ran and reported failures\n- `\"terminal_error\"` means verification could not meaningfully execute as intended\n- `\"error\"` is the generic fallback for non-pytest commands or unknown nonzero exits\n\n## Pytest command detection\n\n`_is_pytest_command(command)` recognizes pytest in two forms:\n\n- executable name starts with `pytest`\n  - examples: `pytest`, `/venv/bin/pytest`\n- module invocation:\n  - `python -m pytest`\n  - any command containing adjacent `\"-m\", \"pytest\"`\n\nIf the command is not recognized as pytest, pytest-specific parsing is skipped.\n\n## Output handling\n\n## ANSI stripping\n\nANSI terminal escape sequences are removed by `_strip_ansi()` using `_ANSI_ESCAPE_RE`.\n\nThis is applied to:\n\n- startup exception text\n- decoded `stdout`\n- decoded `stderr`\n\nThe result is stable, plain-text output suitable for logs, summaries, and persisted excerpts.\n\n## Combining stdout and stderr\n\n`_combine_output(stdout=..., stderr=...)`:\n\n- decodes bytes with `errors=\"replace\"`\n- strips ANSI sequences\n- trims surrounding whitespace\n- labels streams when both are present\n\nExamples:\n\n### Only stdout\n\n```text\ntest output here\n```\n\n### Only stderr\n\n```text\nerror output here\n```\n\n### Both\n\n```text\nSTDOUT:\ntest output here\n\nSTDERR:\nerror output here\n```\n\nThis formatting matters because `_extract_summary_line()` later scans the excerpt line-by-line and intentionally ignores the marker lines `STDOUT:` and `STDERR:`.\n\n## Output truncation\n\n`_truncate_output(text, max_output_bytes)` enforces a byte-based limit, not a character-based limit.\n\nBehavior:\n\n- if output fits, return it unchanged\n- otherwise append:\n\n```text\n...[truncated]\n```\n\nImplementation details worth knowing:\n\n- truncation is done on UTF-8 encoded bytes\n- decoding uses `errors=\"ignore\"` to avoid returning partial multibyte characters\n- if `max_output_bytes` is smaller than the truncation notice itself, only the fitting prefix of the notice is returned\n\nThis makes the function safe for arbitrary Unicode output and very small limits.\n\n## Summary generation\n\n`_build_summary(...)` produces the human-readable summary stored in `VerificationResult.summary`.\n\n### Summary precedence\n\n1. If status is `\"timeout\"`, return the timeout message\n2. Otherwise, try `_extract_summary_line(output_excerpt)`\n3. If no useful line exists, fall back to a status-based default\n\nFallbacks:\n\n- `\"passed\"` → `\"Verification passed.\"`\n- `\"failed\"` → `\"Verification failed with exit code {exit_code}.\"`\n- `\"terminal_error\"` → `\"Verification encountered a terminal error with exit code {exit_code}.\"`\n- otherwise → `\"Verification errored with exit code {exit_code}.\"`\n\n### `_extract_summary_line(output_excerpt)`\n\nThis helper scans the excerpt from bottom to top and returns the last non-empty line that is not one of:\n\n- `STDOUT:`\n- `STDERR:`\n- `...[truncated]`\n\nThis gives summaries that often reflect the most relevant final line from test output without requiring tool-specific parsing.\n\n## Failing test extraction\n\n`_extract_failing_tests(command, output_text)` only runs for pytest commands.\n\nIt searches for lines matching:\n\n```python\nr\"^FAILED\\s+(\\S+)\"\n```\n\nwith `re.MULTILINE`.\n\nReturned values are:\n\n- ordered by first appearance\n- deduplicated via `list(dict.fromkeys(matches))`\n\nThis is a lightweight parser aimed at pytest’s standard failure summary lines, for example:\n\n```text\nFAILED tests/test_example.py::test_case\n```\n\nIf the command is not pytest or output is empty, it returns `[]`.\n\n## Environment setup\n\n## `_build_verification_env(clone_path)`\n\nThe runner inherits the current process environment and prefixes `PYTHONPATH` with the clone root:\n\n```python\nenv = dict(os.environ)\nexisting_pythonpath = env.get(\"PYTHONPATH\")\nenv[\"PYTHONPATH\"] = (\n    f\"{clone_path}{os.pathsep}{existing_pythonpath}\"\n    if existing_pythonpath\n    else str(clone_path)\n)\n```\n\nThis ensures imports resolve against the checked-out task clone first, which is critical when verification runs against modified code that may not exist in the orchestrator’s own import path.\n\n### Important implication\n\nIf verification depends on local package imports, this module makes the clone importable without requiring installation into a virtualenv. It does not otherwise isolate or sanitize the environment.\n\n## Logging\n\nThe runner emits structured logs through `lib.logger.logger` and wraps each event in `logger.scoped_context(...)`.\n\n### Start log\n\nBefore subprocess creation:\n\n- `code_attempt`\n- `head_commit`\n- `command`\n\nMessage:\n\n```text\nVerification started.\n```\n\n### Completion log\n\nOn normal completion:\n\n- `code_attempt`\n- `head_commit`\n- `command`\n- `verification_status`\n- `duration_seconds`\n- `exit_code`\n- `failing_test_count`\n\nMessage:\n\n```text\nVerification completed.\n```\n\n### Timeout log\n\nOn timeout:\n\n- `code_attempt`\n- `head_commit`\n- `command`\n- `verification_status=\"timeout\"`\n- `duration_seconds`\n- `failing_test_count`\n\nMessage:\n\n```text\nVerification timed out.\n```\n\n### Startup failure log\n\nIf the command cannot start:\n\n- `code_attempt`\n- `head_commit`\n- `command`\n- `verification_status=\"terminal_error\"`\n- `duration_seconds`\n\nMessage:\n\n```text\nVerification command could not start.\n```\n\n## `_build_verification_log_context(...)`\n\nThis helper centralizes log field construction and keeps logging consistent across all execution paths.\n\nNotable formatting choice:\n\n```python\n\"command\": \" \".join(command)\n```\n\nLogs store the command as a single string, while `VerificationResult.command` preserves the original list form.\n\n## Module structure\n\n## `api/verification/__init__.py`\n\nExports only:\n\n```python\n__all__ = [\"VerificationRunner\"]\n```\n\nThis keeps the package boundary clean and signals that helper functions in `runner.py` are internal implementation details.\n\n## `api/verification/runner.py`\n\nContains:\n\n- `VerificationRunner`\n- status normalization helpers\n- output processing helpers\n- logging context helper\n- environment construction helper\n\n## Integration with the rest of the codebase\n\nThis module sits below orchestration nodes and above verification result models.\n\n### Upstream callers\n\nThe call graph shows `VerificationRunner` is instantiated or invoked by:\n\n- `build_verification_node` in `api/nodes/verification.py`\n- verification-focused tests\n- boundary logging tests\n\nThat placement indicates this module is the execution engine used by a higher-level workflow node rather than a standalone service.\n\n### Downstream dependencies\n\n`run()` constructs `VerificationResult` instances from `api.models.verification` and uses:\n\n- `VerificationConfig`\n- `VerificationStatus`\n\nIt also depends on `lib.logger` for structured logging.\n\n## Design constraints and contribution notes\n\n## Deterministic by design\n\nThe package docstring describes this as a “deterministic verification subsystem.” In practice, that means:\n\n- no shell invocation\n- explicit command argument handling\n- normalized output formatting\n- bounded output size\n- explicit timeout behavior\n- stable status mapping\n\nWhen extending this module, preserve those properties.\n\n## Keep parsing lightweight\n\nThe current implementation only includes tool-specific logic for pytest, and even that logic is intentionally narrow:\n\n- command detection\n- exit-code mapping\n- failure summary extraction\n\nIf support for other verification tools is added, prefer similarly minimal parsing rather than deep coupling to tool internals.\n\n## Be careful with status semantics\n\nThe distinction between `\"failed\"`, `\"error\"`, and `\"terminal_error\"` is meaningful:\n\n- `\"failed\"`: verification ran and reported expected failures\n- `\"error\"`: command exited nonzero in a generic or unknown way\n- `\"terminal_error\"`: verification could not start or pytest reported a non-test-execution problem\n\nChanges to `_normalize_status()` can affect orchestration behavior upstream.\n\n## Preserve byte-based truncation\n\n`_truncate_output()` is byte-aware for a reason. Replacing it with character-count truncation would make output limits less predictable and could break storage or transport assumptions elsewhere.\n\n## Preserve partial output on timeout\n\nThe timeout path intentionally collects output after killing the process. This is useful operationally and should not be removed unless the surrounding workflow changes significantly.\n\n## Example mental model\n\nA typical verification run looks like this:\n\n1. orchestration prepares a clone and chooses a verification command\n2. `VerificationRunner.run()` starts the command in that clone\n3. the clone root is prepended to `PYTHONPATH`\n4. output is captured and normalized\n5. the process outcome is mapped into a domain status\n6. a `VerificationResult` is returned to the workflow layer\n7. the workflow layer decides what to do next\n\nThat separation keeps this module focused on execution correctness and result normalization rather than orchestration policy.","task-orchestration-and-workflow-execution":"# Task Orchestration and Workflow Execution\n\n# Task Orchestration and Workflow Execution\n\nThe Task Orchestration and Workflow Execution module group is the runtime layer that turns a persisted task into an executable workflow. It connects task lifecycle management in `TaskService`, LangGraph orchestration in `api.graphs`, executable workflow steps in `api.nodes`, and deterministic repository verification in `api.verification`.\n\n## Purpose\n\nTogether, these sub-modules handle the end-to-end execution loop for task work:\n\n- create or recover a task through `TaskService`\n- choose and run the appropriate workflow graph\n- execute node-level side effects against a task-specific git clone\n- review and route based on plan, code review, and verification outcomes\n- persist artifacts, status, and debug state for inspection and recovery\n\nThis layer is where high-level task intent becomes concrete branch setup, planning, implementation, verification, and revision cycles.\n\n## How the sub-modules fit together\n\n### [graphs](graphs.md)\n`api.graphs` defines the workflow topology. Its compiled builders—`build_planning_graph()` and `build_implementation_graph()`—assemble node factories from `api.nodes` into executable LangGraph flows. Routing helpers such as `route_after_plan_review()`, `route_after_review()`, `route_after_verification()`, and `route_after_code_review()` decide whether execution advances, loops for revision, or terminates.\n\n### [nodes](nodes.md)\n`api.nodes` provides the executable units used by the graphs. These factories bind runtime dependencies and return async callables that consume `PlanningState`, `ImplementationState`, or setup-related state and emit partial updates. In practice, they perform branch setup, planning, review, coding, verification preparation, and code review against the task clone and artifact store.\n\n### [services](services.md)\n`api.services` is the orchestration boundary exposed to the rest of the application. `TaskService` creates tasks, resolves task context, runs graphs, recovers publication-related tasks, and exposes inspection/debug helpers. It is the entry point that coordinates persistence, graph execution, GitHub integration, and status publication.\n\n### [verification](verification.md)\n`api.verification` supplies `VerificationRunner`, the deterministic execution boundary for repository-owned verification commands. Workflow nodes use its normalized `VerificationResult` and status classification to drive graph routing after implementation work has been prepared.\n\n## Key workflows\n\n### Plan-only execution\nFor tasks that stop at planning, `TaskService` invokes the planning graph built by `build_planning_graph()`. That graph typically starts with branch setup and proceeds through planner and reviewer-style nodes, using `route_after_plan_review()` to determine whether the plan is accepted or revised.\n\n### Planning-to-implementation execution\nFor tasks that continue into coding, `TaskService` runs the implementation graph from `build_implementation_graph()`. This graph coordinates:\n\n- setup and planning\n- review-driven transition into implementation\n- coding via nodes such as the coder node\n- deterministic verification through `VerificationRunner`\n- code review and possible revision loops\n\nRouting functions in `api.graphs` are the control points that turn review and verification outcomes into the next graph step.\n\n### Recovery, inspection, and publication-aware execution\nThe service layer also supports operational workflows around execution:\n\n- recovering publication tasks and GitHub linkage\n- resolving task plans from branch state\n- inspecting graph debug snapshots and current graph state\n- publishing failure or status updates back to external systems\n\nThese flows keep execution resumable and observable without changing the graph/node contract.\n\n## Execution boundaries\n\nThis module group sits between:\n\n- API routes and operators\n- persistence repositories\n- git and GitHub integrations\n- artifact and observability storage\n- repository-defined verification commands\n\nThat separation is important: `api.services` decides *when* to run work, `api.graphs` decides *how work flows*, `api.nodes` perform *the work itself*, and `api.verification` provides *deterministic pass/fail evidence* used by the workflow.\n\n## Typical control flow\n\n```text\nTaskService\n  → select/build graph\n  → graph invokes node callables\n  → nodes update workflow state and repository artifacts\n  → verification/review results feed routing functions\n  → graph completes, loops, or fails\n  → service persists status, exposes inspection data, and publishes updates\n```\n\n## See also\n\n- [Task Orchestration and Workflow Execution — graphs](graphs.md)\n- [Task Orchestration and Workflow Execution — nodes](nodes.md)\n- [Task Orchestration and Workflow Execution — services](services.md)\n- [Task Orchestration and Workflow Execution — verification](verification.md)"};
 var TREE = [{"name":"Application Bootstrap and API Surface","slug":"application-bootstrap-and-api-surface","files":["api/__init__.py","api/__main__.py","api/app.py","api/routes/__init__.py","api/routes/tasks.py"]},{"name":"Task Orchestration and Workflow Execution","slug":"task-orchestration-and-workflow-execution","files":[],"children":[{"name":"Task Orchestration and Workflow Execution — graphs","slug":"task-orchestration-and-workflow-execution-graphs","files":["api/graphs/__init__.py","api/graphs/implementation.py","api/graphs/planning.py"]},{"name":"Task Orchestration and Workflow Execution — nodes","slug":"task-orchestration-and-workflow-execution-nodes","files":["api/nodes/__init__.py","api/nodes/code_reviewer.py","api/nodes/coder.py","api/nodes/execution_scope.py","api/nodes/planner.py","api/nodes/reviewer.py","api/nodes/reviser.py","api/nodes/setup_branch.py","api/nodes/verification.py"]},{"name":"Task Orchestration and Workflow Execution — services","slug":"task-orchestration-and-workflow-execution-services","files":["api/services/__init__.py","api/services/execution.py","api/services/tasks.py"]},{"name":"Task Orchestration and Workflow Execution — verification","slug":"task-orchestration-and-workflow-execution-verification","files":["api/verification/__init__.py","api/verification/runner.py"]}]},{"name":"Agent Runtime and Tooling Integration","slug":"agent-runtime-and-tooling-integration","files":["api/agents/__init__.py","api/agents/backends.py","api/agents/factory.py","api/agents/invoke.py","api/agents/middleware.py","api/agents/registry.py","api/tools/__init__.py","api/tools/base.py","api/tools/git.py","api/tools/registry.py"]},{"name":"Skills and Extensible Capabilities","slug":"skills-and-extensible-capabilities","files":["api/skills/__init__.py","api/skills/bridge.py","api/skills/discovery.py","api/skills/loader.py","api/skills/models.py","api/skills/requirements.py"]},{"name":"GitHub Integration and Publication","slug":"github-integration-and-publication","files":["api/github/comments.py","api/github/context.py","api/github/publisher.py","lib/github/__init__.py","lib/github/client.py","lib/github/errors.py","lib/github/models.py"]},{"name":"Repository Context and Memory Management","slug":"repository-context-and-memory-management","files":["api/context/__init__.py","api/context/discovery.py","api/memory/__init__.py","api/memory/consolidation.py","api/memory/governance.py","api/models/repository_context.py"]},{"name":"Domain Models and Error Contracts","slug":"domain-models-and-error-contracts","files":["api/errors.py","api/models/__init__.py","api/models/agents.py","api/models/artifacts.py","api/models/review.py","api/models/tasks.py","api/models/verification.py","lib/errors.py"]},{"name":"Persistence and Database Layer","slug":"persistence-and-database-layer","files":["alembic/env.py","alembic/versions/20260402_000001_initial_tasks_and_artifacts.py","alembic/versions/20260403_000002_task_github_linkage.py","alembic/versions/20260408_000003_add_verification_artifact_type.py","api/repositories/__init__.py","api/repositories/artifacts.py","api/repositories/database.py","api/repositories/tasks.py"]},{"name":"Git Operations and Workspace Execution","slug":"git-operations-and-workspace-execution","files":["lib/git/__init__.py","lib/git/client.py","lib/git/errors.py","api/observability.py"]},{"name":"Configuration and Logging Infrastructure","slug":"configuration-and-logging-infrastructure","files":["lib/__init__.py","lib/config.py","lib/logger.py"]},{"name":"Other","slug":"other","files":[],"children":[{"name":"Other — AGENTS.md","slug":"other-agents-md","files":["AGENTS.md"]},{"name":"Other — CLAUDE.md","slug":"other-claude-md","files":["CLAUDE.md"]},{"name":"Other — Dockerfile","slug":"other-dockerfile","files":["Dockerfile"]},{"name":"Other — alembic.ini","slug":"other-alembic-ini","files":["alembic.ini"]},{"name":"Other — alembic","slug":"other-alembic","files":["alembic/script.py.mako"]},{"name":"Other — docker-compose.yml","slug":"other-docker-compose-yml","files":["docker-compose.yml"]},{"name":"Other — plans","slug":"other-plans","files":["docs/plans/00_cross_phase_invariants.md","docs/plans/01_mvp_implementation.md","docs/plans/02_github_integration.md","docs/plans/02a_execution_verification.md","docs/plans/02b_repository_context_discovery.md","docs/plans/03_agent_extensibility.md","docs/plans/03a_verification_routing.md","docs/plans/03b_execution_observability.md","docs/plans/04_feedback_pipeline.md","docs/plans/05_scaling_infrastructure.md","docs/plans/05a_execution_slicing_and_session_handoffs.md","docs/plans/06_autonomy_and_triage.md","docs/plans/07_advanced_loop_intelligence.md"]},{"name":"Other — pyproject.toml","slug":"other-pyproject-toml","files":["pyproject.toml"]},{"name":"Other — tests","slug":"other-tests","files":["tests/conftest.py","tests/test_agent_backends.py","tests/test_agent_factory.py","tests/test_agent_registry.py","tests/test_app.py","tests/test_boundary_logging.py","tests/test_code_reviewer.py","tests/test_coder.py","tests/test_config.py","tests/test_errors.py","tests/test_execution_github_context.py","tests/test_execution_scope.py","tests/test_git_client.py","tests/test_git_tool.py","tests/test_github_client.py","tests/test_github_comments.py","tests/test_github_context.py","tests/test_github_models.py","tests/test_github_publisher.py","tests/test_github_settings.py","tests/test_implementation_graph.py","tests/test_implementation_task_api.py","tests/test_implementation_verification.py","tests/test_logger.py","tests/test_memory_governance.py","tests/test_observability_helpers.py","tests/test_planner.py","tests/test_planning_graph.py","tests/test_repo_context_prompt.py","tests/test_repositories.py","tests/test_repository_context_discovery.py","tests/test_repository_context_models.py","tests/test_schema.py","tests/test_setup_branch.py","tests/test_skill_app_bootstrap.py","tests/test_skill_bridge.py","tests/test_skill_discovery.py","tests/test_skill_task_service.py","tests/test_task_api.py","tests/test_task_models.py","tests/test_task_service.py","tests/test_tool_extensibility_contracts.py","tests/test_tool_registry.py","tests/test_verification_models.py","tests/test_verification_runner.py"]},{"name":"Other — workspace","slug":"other-workspace","files":["workspace/HEARTBEAT.example.md"]},{"name":"Other — memory","slug":"other-memory","files":["workspace/memory/HISTORY.example.md","workspace/memory/MEMORY.example.md"]},{"name":"Other — prompts","slug":"other-prompts","files":["workspace/prompts/PROMPT.example.md","workspace/prompts/code_reviewer.md","workspace/prompts/coder.md","workspace/prompts/planner.md","workspace/prompts/reviewer.md","workspace/prompts/reviser.md"]},{"name":"Other — skills","slug":"other-skills","files":["workspace/skills/SKILL.example.md"]}]}];
 var META = {"fromCommit":"179dad5ce6aae35f69b5ed0ab3a9de4ee9ebc9f1","generatedAt":"2026-04-12T14:29:50.809Z","model":"gpt-5.4","moduleFiles":{"Application Bootstrap and API Surface":["api/__init__.py","api/__main__.py","api/app.py","api/routes/__init__.py","api/routes/tasks.py"],"Task Orchestration and Workflow Execution":["api/graphs/__init__.py","api/graphs/implementation.py","api/graphs/planning.py","api/nodes/__init__.py","api/nodes/code_reviewer.py","api/nodes/coder.py","api/nodes/execution_scope.py","api/nodes/planner.py","api/nodes/reviewer.py","api/nodes/reviser.py","api/nodes/setup_branch.py","api/nodes/verification.py","api/services/__init__.py","api/services/execution.py","api/services/tasks.py","api/verification/__init__.py","api/verification/runner.py"],"Task Orchestration and Workflow Execution — graphs":["api/graphs/__init__.py","api/graphs/implementation.py","api/graphs/planning.py"],"Task Orchestration and Workflow Execution — nodes":["api/nodes/__init__.py","api/nodes/code_reviewer.py","api/nodes/coder.py","api/nodes/execution_scope.py","api/nodes/planner.py","api/nodes/reviewer.py","api/nodes/reviser.py","api/nodes/setup_branch.py","api/nodes/verification.py"],"Task Orchestration and Workflow Execution — services":["api/services/__init__.py","api/services/execution.py","api/services/tasks.py"],"Task Orchestration and Workflow Execution — verification":["api/verification/__init__.py","api/verification/runner.py"],"Agent Runtime and Tooling Integration":["api/agents/__init__.py","api/agents/backends.py","api/agents/factory.py","api/agents/invoke.py","api/agents/middleware.py","api/agents/registry.py","api/tools/__init__.py","api/tools/base.py","api/tools/git.py","api/tools/registry.py"],"Skills and Extensible Capabilities":["api/skills/__init__.py","api/skills/bridge.py","api/skills/discovery.py","api/skills/loader.py","api/skills/models.py","api/skills/requirements.py"],"GitHub Integration and Publication":["api/github/comments.py","api/github/context.py","api/github/publisher.py","lib/github/__init__.py","lib/github/client.py","lib/github/errors.py","lib/github/models.py"],"Repository Context and Memory Management":["api/context/__init__.py","api/context/discovery.py","api/memory/__init__.py","api/memory/consolidation.py","api/memory/governance.py","api/models/repository_context.py"],"Domain Models and Error Contracts":["api/errors.py","api/models/__init__.py","api/models/agents.py","api/models/artifacts.py","api/models/review.py","api/models/tasks.py","api/models/verification.py","lib/errors.py"],"Persistence and Database Layer":["alembic/env.py","alembic/versions/20260402_000001_initial_tasks_and_artifacts.py","alembic/versions/20260403_000002_task_github_linkage.py","alembic/versions/20260408_000003_add_verification_artifact_type.py","api/repositories/__init__.py","api/repositories/artifacts.py","api/repositories/database.py","api/repositories/tasks.py"],"Git Operations and Workspace Execution":["lib/git/__init__.py","lib/git/client.py","lib/git/errors.py","api/observability.py"],"Configuration and Logging Infrastructure":["lib/__init__.py","lib/config.py","lib/logger.py"],"Other":["AGENTS.md","CLAUDE.md","Dockerfile","alembic.ini","alembic/script.py.mako","docker-compose.yml","docs/plans/00_cross_phase_invariants.md","docs/plans/01_mvp_implementation.md","docs/plans/02_github_integration.md","docs/plans/02a_execution_verification.md","docs/plans/02b_repository_context_discovery.md","docs/plans/03_agent_extensibility.md","docs/plans/03a_verification_routing.md","docs/plans/03b_execution_observability.md","docs/plans/04_feedback_pipeline.md","docs/plans/05_scaling_infrastructure.md","docs/plans/05a_execution_slicing_and_session_handoffs.md","docs/plans/06_autonomy_and_triage.md","docs/plans/07_advanced_loop_intelligence.md","pyproject.toml","tests/conftest.py","tests/test_agent_backends.py","tests/test_agent_factory.py","tests/test_agent_registry.py","tests/test_app.py","tests/test_boundary_logging.py","tests/test_code_reviewer.py","tests/test_coder.py","tests/test_config.py","tests/test_errors.py","tests/test_execution_github_context.py","tests/test_execution_scope.py","tests/test_git_client.py","tests/test_git_tool.py","tests/test_github_client.py","tests/test_github_comments.py","tests/test_github_context.py","tests/test_github_models.py","tests/test_github_publisher.py","tests/test_github_settings.py","tests/test_implementation_graph.py","tests/test_implementation_task_api.py","tests/test_implementation_verification.py","tests/test_logger.py","tests/test_memory_governance.py","tests/test_observability_helpers.py","tests/test_planner.py","tests/test_planning_graph.py","tests/test_repo_context_prompt.py","tests/test_repositories.py","tests/test_repository_context_discovery.py","tests/test_repository_context_models.py","tests/test_schema.py","tests/test_setup_branch.py","tests/test_skill_app_bootstrap.py","tests/test_skill_bridge.py","tests/test_skill_discovery.py","tests/test_skill_task_service.py","tests/test_task_api.py","tests/test_task_models.py","tests/test_task_service.py","tests/test_tool_extensibility_contracts.py","tests/test_tool_registry.py","tests/test_verification_models.py","tests/test_verification_runner.py","workspace/HEARTBEAT.example.md","workspace/memory/HISTORY.example.md","workspace/memory/MEMORY.example.md","workspace/prompts/PROMPT.example.md","workspace/prompts/code_reviewer.md","workspace/prompts/coder.md","workspace/prompts/planner.md","workspace/prompts/reviewer.md","workspace/prompts/reviser.md","workspace/skills/SKILL.example.md"],"Other — AGENTS.md":["AGENTS.md"],"Other — CLAUDE.md":["CLAUDE.md"],"Other — Dockerfile":["Dockerfile"],"Other — alembic.ini":["alembic.ini"],"Other — alembic":["alembic/script.py.mako"],"Other — docker-compose.yml":["docker-compose.yml"],"Other — plans":["docs/plans/00_cross_phase_invariants.md","docs/plans/01_mvp_implementation.md","docs/plans/02_github_integration.md","docs/plans/02a_execution_verification.md","docs/plans/02b_repository_context_discovery.md","docs/plans/03_agent_extensibility.md","docs/plans/03a_verification_routing.md","docs/plans/03b_execution_observability.md","docs/plans/04_feedback_pipeline.md","docs/plans/05_scaling_infrastructure.md","docs/plans/05a_execution_slicing_and_session_handoffs.md","docs/plans/06_autonomy_and_triage.md","docs/plans/07_advanced_loop_intelligence.md"],"Other — pyproject.toml":["pyproject.toml"],"Other — tests":["tests/conftest.py","tests/test_agent_backends.py","tests/test_agent_factory.py","tests/test_agent_registry.py","tests/test_app.py","tests/test_boundary_logging.py","tests/test_code_reviewer.py","tests/test_coder.py","tests/test_config.py","tests/test_errors.py","tests/test_execution_github_context.py","tests/test_execution_scope.py","tests/test_git_client.py","tests/test_git_tool.py","tests/test_github_client.py","tests/test_github_comments.py","tests/test_github_context.py","tests/test_github_models.py","tests/test_github_publisher.py","tests/test_github_settings.py","tests/test_implementation_graph.py","tests/test_implementation_task_api.py","tests/test_implementation_verification.py","tests/test_logger.py","tests/test_memory_governance.py","tests/test_observability_helpers.py","tests/test_planner.py","tests/test_planning_graph.py","tests/test_repo_context_prompt.py","tests/test_repositories.py","tests/test_repository_context_discovery.py","tests/test_repository_context_models.py","tests/test_schema.py","tests/test_setup_branch.py","tests/test_skill_app_bootstrap.py","tests/test_skill_bridge.py","tests/test_skill_discovery.py","tests/test_skill_task_service.py","tests/test_task_api.py","tests/test_task_models.py","tests/test_task_service.py","tests/test_tool_extensibility_contracts.py","tests/test_tool_registry.py","tests/test_verification_models.py","tests/test_verification_runner.py"],"Other — workspace":["workspace/HEARTBEAT.example.md"],"Other — memory":["workspace/memory/HISTORY.example.md","workspace/memory/MEMORY.example.md"],"Other — prompts":["workspace/prompts/PROMPT.example.md","workspace/prompts/code_reviewer.md","workspace/prompts/coder.md","workspace/prompts/planner.md","workspace/prompts/reviewer.md","workspace/prompts/reviser.md"],"Other — skills":["workspace/skills/SKILL.example.md"]},"moduleTree":[{"name":"Application Bootstrap and API Surface","slug":"application-bootstrap-and-api-surface","files":["api/__init__.py","api/__main__.py","api/app.py","api/routes/__init__.py","api/routes/tasks.py"]},{"name":"Task Orchestration and Workflow Execution","slug":"task-orchestration-and-workflow-execution","files":[],"children":[{"name":"Task Orchestration and Workflow Execution — graphs","slug":"task-orchestration-and-workflow-execution-graphs","files":["api/graphs/__init__.py","api/graphs/implementation.py","api/graphs/planning.py"]},{"name":"Task Orchestration and Workflow Execution — nodes","slug":"task-orchestration-and-workflow-execution-nodes","files":["api/nodes/__init__.py","api/nodes/code_reviewer.py","api/nodes/coder.py","api/nodes/execution_scope.py","api/nodes/planner.py","api/nodes/reviewer.py","api/nodes/reviser.py","api/nodes/setup_branch.py","api/nodes/verification.py"]},{"name":"Task Orchestration and Workflow Execution — services","slug":"task-orchestration-and-workflow-execution-services","files":["api/services/__init__.py","api/services/execution.py","api/services/tasks.py"]},{"name":"Task Orchestration and Workflow Execution — verification","slug":"task-orchestration-and-workflow-execution-verification","files":["api/verification/__init__.py","api/verification/runner.py"]}]},{"name":"Agent Runtime and Tooling Integration","slug":"agent-runtime-and-tooling-integration","files":["api/agents/__init__.py","api/agents/backends.py","api/agents/factory.py","api/agents/invoke.py","api/agents/middleware.py","api/agents/registry.py","api/tools/__init__.py","api/tools/base.py","api/tools/git.py","api/tools/registry.py"]},{"name":"Skills and Extensible Capabilities","slug":"skills-and-extensible-capabilities","files":["api/skills/__init__.py","api/skills/bridge.py","api/skills/discovery.py","api/skills/loader.py","api/skills/models.py","api/skills/requirements.py"]},{"name":"GitHub Integration and Publication","slug":"github-integration-and-publication","files":["api/github/comments.py","api/github/context.py","api/github/publisher.py","lib/github/__init__.py","lib/github/client.py","lib/github/errors.py","lib/github/models.py"]},{"name":"Repository Context and Memory Management","slug":"repository-context-and-memory-management","files":["api/context/__init__.py","api/context/discovery.py","api/memory/__init__.py","api/memory/consolidation.py","api/memory/governance.py","api/models/repository_context.py"]},{"name":"Domain Models and Error Contracts","slug":"domain-models-and-error-contracts","files":["api/errors.py","api/models/__init__.py","api/models/agents.py","api/models/artifacts.py","api/models/review.py","api/models/tasks.py","api/models/verification.py","lib/errors.py"]},{"name":"Persistence and Database Layer","slug":"persistence-and-database-layer","files":["alembic/env.py","alembic/versions/20260402_000001_initial_tasks_and_artifacts.py","alembic/versions/20260403_000002_task_github_linkage.py","alembic/versions/20260408_000003_add_verification_artifact_type.py","api/repositories/__init__.py","api/repositories/artifacts.py","api/repositories/database.py","api/repositories/tasks.py"]},{"name":"Git Operations and Workspace Execution","slug":"git-operations-and-workspace-execution","files":["lib/git/__init__.py","lib/git/client.py","lib/git/errors.py","api/observability.py"]},{"name":"Configuration and Logging Infrastructure","slug":"configuration-and-logging-infrastructure","files":["lib/__init__.py","lib/config.py","lib/logger.py"]},{"name":"Other","slug":"other","files":[],"children":[{"name":"Other — AGENTS.md","slug":"other-agents-md","files":["AGENTS.md"]},{"name":"Other — CLAUDE.md","slug":"other-claude-md","files":["CLAUDE.md"]},{"name":"Other — Dockerfile","slug":"other-dockerfile","files":["Dockerfile"]},{"name":"Other — alembic.ini","slug":"other-alembic-ini","files":["alembic.ini"]},{"name":"Other — alembic","slug":"other-alembic","files":["alembic/script.py.mako"]},{"name":"Other — docker-compose.yml","slug":"other-docker-compose-yml","files":["docker-compose.yml"]},{"name":"Other — plans","slug":"other-plans","files":["docs/plans/00_cross_phase_invariants.md","docs/plans/01_mvp_implementation.md","docs/plans/02_github_integration.md","docs/plans/02a_execution_verification.md","docs/plans/02b_repository_context_discovery.md","docs/plans/03_agent_extensibility.md","docs/plans/03a_verification_routing.md","docs/plans/03b_execution_observability.md","docs/plans/04_feedback_pipeline.md","docs/plans/05_scaling_infrastructure.md","docs/plans/05a_execution_slicing_and_session_handoffs.md","docs/plans/06_autonomy_and_triage.md","docs/plans/07_advanced_loop_intelligence.md"]},{"name":"Other — pyproject.toml","slug":"other-pyproject-toml","files":["pyproject.toml"]},{"name":"Other — tests","slug":"other-tests","files":["tests/conftest.py","tests/test_agent_backends.py","tests/test_agent_factory.py","tests/test_agent_registry.py","tests/test_app.py","tests/test_boundary_logging.py","tests/test_code_reviewer.py","tests/test_coder.py","tests/test_config.py","tests/test_errors.py","tests/test_execution_github_context.py","tests/test_execution_scope.py","tests/test_git_client.py","tests/test_git_tool.py","tests/test_github_client.py","tests/test_github_comments.py","tests/test_github_context.py","tests/test_github_models.py","tests/test_github_publisher.py","tests/test_github_settings.py","tests/test_implementation_graph.py","tests/test_implementation_task_api.py","tests/test_implementation_verification.py","tests/test_logger.py","tests/test_memory_governance.py","tests/test_observability_helpers.py","tests/test_planner.py","tests/test_planning_graph.py","tests/test_repo_context_prompt.py","tests/test_repositories.py","tests/test_repository_context_discovery.py","tests/test_repository_context_models.py","tests/test_schema.py","tests/test_setup_branch.py","tests/test_skill_app_bootstrap.py","tests/test_skill_bridge.py","tests/test_skill_discovery.py","tests/test_skill_task_service.py","tests/test_task_api.py","tests/test_task_models.py","tests/test_task_service.py","tests/test_tool_extensibility_contracts.py","tests/test_tool_registry.py","tests/test_verification_models.py","tests/test_verification_runner.py"]},{"name":"Other — workspace","slug":"other-workspace","files":["workspace/HEARTBEAT.example.md"]},{"name":"Other — memory","slug":"other-memory","files":["workspace/memory/HISTORY.example.md","workspace/memory/MEMORY.example.md"]},{"name":"Other — prompts","slug":"other-prompts","files":["workspace/prompts/PROMPT.example.md","workspace/prompts/code_reviewer.md","workspace/prompts/coder.md","workspace/prompts/planner.md","workspace/prompts/reviewer.md","workspace/prompts/reviser.md"]},{"name":"Other — skills","slug":"other-skills","files":["workspace/skills/SKILL.example.md"]}]}]};

 (function() {
  var activePage = 'overview';

  document.addEventListener('DOMContentLoaded', function() {
    mermaid.initialize({ startOnLoad: false, theme: 'neutral', securityLevel: 'loose' });
    renderMeta();
    renderNav();
    document.getElementById('menu-toggle').addEventListener('click', function() {
      document.getElementById('sidebar').classList.toggle('open');
    });
    if (location.hash && location.hash.length > 1) {
      activePage = decodeURIComponent(location.hash.slice(1));
    }
    navigateTo(activePage);
  });

  function renderMeta() {
    if (!META) return;
    var el = document.getElementById('meta-info');
    var parts = [];
    if (META.generatedAt) {
      parts.push(new Date(META.generatedAt).toLocaleDateString());
    }
    if (META.model) parts.push(META.model);
    if (META.fromCommit) parts.push(META.fromCommit.slice(0, 8));
    el.textContent = parts.join(' \u00b7 ');
  }

  function renderNav() {
    var container = document.getElementById('nav-tree');
    var html = '<div class="nav-section">';
    html += '<a class="nav-item overview" data-page="overview" href="#overview">Overview</a>';
    html += '</div>';
    if (TREE.length > 0) {
      html += '<div class="nav-group-label">Modules</div>';
      html += buildNavTree(TREE);
    }
    container.innerHTML = html;
    container.addEventListener('click', function(e) {
      var target = e.target;
      while (target && !target.dataset.page) { target = target.parentElement; }
      if (target && target.dataset.page) {
        e.preventDefault();
        navigateTo(target.dataset.page);
      }
    });
  }

  function buildNavTree(nodes) {
    var html = '';
    for (var i = 0; i < nodes.length; i++) {
      var node = nodes[i];
      html += '<div class="nav-section">';
      html += '<a class="nav-item" data-page="' + escH(node.slug) + '" href="#' + encodeURIComponent(node.slug) + '">' + escH(node.name) + '</a>';
      if (node.children && node.children.length > 0) {
        html += '<div class="nav-children">' + buildNavTree(node.children) + '</div>';
      }
      html += '</div>';
    }
    return html;
  }

  function escH(s) {
    var d = document.createElement('div');
    d.textContent = s;
    return d.innerHTML;
  }

  function navigateTo(page) {
    activePage = page;
    location.hash = encodeURIComponent(page);

    var items = document.querySelectorAll('.nav-item');
    for (var i = 0; i < items.length; i++) {
      if (items[i].dataset.page === page) {
        items[i].classList.add('active');
      } else {
        items[i].classList.remove('active');
      }
    }

    var contentEl = document.getElementById('content');
    var md = PAGES[page];

    if (!md) {
      contentEl.innerHTML = '<div class="empty-state"><h2>Page not found</h2><p>' + escH(page) + '.md does not exist.</p></div>';
      return;
    }

    contentEl.innerHTML = marked.parse(md);

    // Rewrite .md links to hash navigation
    var links = contentEl.querySelectorAll('a[href]');
    for (var i = 0; i < links.length; i++) {
      var href = links[i].getAttribute('href');
      if (href && href.endsWith('.md') && href.indexOf('://') === -1) {
        var slug = href.replace(/\.md$/, '');
        links[i].setAttribute('href', '#' + encodeURIComponent(slug));
        (function(s) {
          links[i].addEventListener('click', function(e) {
            e.preventDefault();
            navigateTo(s);
          });
        })(slug);
      }
    }

    // Convert mermaid code blocks into mermaid divs
    var mermaidBlocks = contentEl.querySelectorAll('pre code.language-mermaid');
    for (var i = 0; i < mermaidBlocks.length; i++) {
      var pre = mermaidBlocks[i].parentElement;
      var div = document.createElement('div');
      div.className = 'mermaid';
      div.textContent = mermaidBlocks[i].textContent;
      pre.parentNode.replaceChild(div, pre);
    }
    try { mermaid.run({ querySelector: '.mermaid' }); } catch(e) {}

    window.scrollTo(0, 0);
    document.getElementById('sidebar').classList.remove('open');
  }
 })();

 </script>
 </body>
 </html>
No results found