Anemll · June 9, 2026 22:12
diff --git a/README.md b/README.md
diff --git a/afm-tools.ts b/afm-tools.ts
 export default function (pi: any) {
  let lastPayloadTokens = 0;

  pi.on("before_provider_request", (event: any, ctx: any) => {
    if (ctx.model?.provider !== "fm") return;

    const payload = structuredClone(event.payload);
    rewriteEditToolSchema(payload);
    rewriteEditPromptGuidance(payload);
    if (ctx.model?.id === "system") {
      rewriteCompactSystemPrompt(payload);
      rewriteCompactToolSchemas(payload);
      compactLargeToolMessages(payload);
    }
    lastPayloadTokens = estimatePayloadTokens(payload);
    return payload;
  });

  pi.on("message_end", (event: any, ctx: any) => {
    if (ctx.model?.provider !== "fm") return;
    if (event.message?.role !== "assistant") return;

    const message = { ...event.message };
    let changed = false;

    if (
      message.stopReason === "error" &&
      typeof message.errorMessage === "string" &&
      /transcript exceeded the model'?s context size/i.test(message.errorMessage)
    ) {
      message.errorMessage = `Input exceeds the context window: ${message.errorMessage}`;
      changed = true;
    }

    if (lastPayloadTokens > 0 && (!message.usage || message.usage.totalTokens === 0)) {
      const outputTokens = estimateAssistantOutputTokens(message);
      message.usage = {
        input: lastPayloadTokens,
        output: outputTokens,
        cacheRead: 0,
        cacheWrite: 0,
        totalTokens: lastPayloadTokens + outputTokens,
        cost: {
          input: 0,
          output: 0,
          cacheRead: 0,
          cacheWrite: 0,
          total: 0,
        },
      };
      changed = true;
    }

    if (changed) return { message };
  });

  pi.on("tool_call", (event: any, ctx: any) => {
    if (ctx.model?.provider !== "fm" || ctx.model?.id !== "system") return;
    if (event.toolName !== "read") return;

    const limit = typeof event.input?.limit === "number" ? event.input.limit : undefined;
    if (limit === undefined || limit > SYSTEM_READ_LINE_LIMIT) {
      event.input.limit = SYSTEM_READ_LINE_LIMIT;
    }
  });
 }

 const SYSTEM_READ_LINE_LIMIT = 80;
 const SYSTEM_TOOL_MESSAGE_CHAR_LIMIT = 1800;
 const SYSTEM_TOTAL_TOOL_CHARS_LIMIT = 2400;
 const SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT = 180;

 function rewriteCompactSystemPrompt(payload: any) {
  if (!payload || !Array.isArray(payload.messages)) return;

  for (const message of payload.messages) {
    if (message?.role !== "system" && message?.role !== "developer") continue;
    if (typeof message.content !== "string") continue;

    const date = message.content.match(/Current date: .*/)?.[0];
    const cwd = message.content.match(/Current working directory: .*/)?.[0];
    message.content = [
      "You are a concise coding assistant running inside pi.",
      "Use tools to inspect files, run commands, and make targeted edits.",
      "This on-device Apple Foundation Model has a small 4K context window.",
      "Keep context small: prefer rg, sed -n, python3 -m py_compile, and read offset/limit over full-file reads.",
      "Do not read the same large file repeatedly. Read only the relevant line ranges.",
      "Use edit for one exact replacement; call edit multiple times for separate replacements.",
      "On macOS, use python3 rather than python unless the user asks otherwise.",
      date,
      cwd,
    ]
      .filter(Boolean)
      .join("\n");
  }
 }

 function rewriteCompactToolSchemas(payload: any) {
  if (!payload || !Array.isArray(payload.tools)) return;

  for (const tool of payload.tools) {
    const fn = tool?.function;
    if (tool?.type !== "function" || !fn?.name) continue;

    if (fn.name === "read") {
      fn.description = "Read a file. Use offset and limit for small chunks.";
      fn.parameters = {
        type: "object",
        required: ["path"],
        properties: {
          path: { type: "string", description: "File path." },
          offset: { type: "number", description: "Start line, 1-indexed." },
          limit: { type: "number", description: "Max lines." },
        },
      };
    } else if (fn.name === "bash") {
      fn.description = "Run a shell command.";
      fn.parameters = {
        type: "object",
        required: ["command"],
        properties: {
          command: { type: "string", description: "Command to run." },
          timeout: { type: "number", description: "Timeout in seconds." },
        },
      };
    } else if (fn.name === "write") {
      fn.description = "Create or overwrite a file.";
      fn.parameters = {
        type: "object",
        required: ["path", "content"],
        properties: {
          path: { type: "string", description: "File path." },
          content: { type: "string", description: "File content." },
        },
      };
    } else if (fn.name === "edit") {
      fn.description = "Replace one exact text block in one file.";
      fn.parameters = {
        type: "object",
        required: ["path", "oldText", "newText"],
        properties: {
          path: { type: "string", description: "File path." },
          oldText: { type: "string", description: "Exact text to replace." },
          newText: { type: "string", description: "Replacement text." },
        },
      };
    }
  }
 }

 function compactLargeToolMessages(payload: any) {
  if (!payload || !Array.isArray(payload.messages)) return;

  const toolMessages = payload.messages.filter((message: any) => message?.role === "tool");
  const olderToolMessages = toolMessages.slice(0, Math.max(0, toolMessages.length - 4));
  for (const message of olderToolMessages) {
    const text = getMessageText(message);
    if (text.length > SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT) {
      setMessageText(message, truncateText(text, SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT, "older tool output"));
    }
  }

  for (const message of toolMessages) {
    const text = getMessageText(message);
    if (text.length > SYSTEM_TOOL_MESSAGE_CHAR_LIMIT) {
      setMessageText(message, truncateText(text, SYSTEM_TOOL_MESSAGE_CHAR_LIMIT, "tool output"));
    }
  }

  let totalToolChars = toolMessages.reduce((sum: number, message: any) => sum + getMessageText(message).length, 0);
  for (const message of toolMessages) {
    if (totalToolChars <= SYSTEM_TOTAL_TOOL_CHARS_LIMIT) break;
    const text = getMessageText(message);

    const nextText = truncateText(text, SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT, "older tool output");
    setMessageText(message, nextText);
    totalToolChars -= text.length - nextText.length;
  }
 }

 function getMessageText(message: any): string {
  if (typeof message?.content === "string") return message.content;
  if (!Array.isArray(message?.content)) return "";
  return message.content.map((block: any) => (typeof block?.text === "string" ? block.text : "")).join("\n");
 }

 function setMessageText(message: any, text: string) {
  if (typeof message?.content === "string") {
    message.content = text;
  } else {
    message.content = [{ type: "text", text }];
  }
 }

 function truncateText(text: string, maxChars: number, label: string): string {
  if (text.length <= maxChars) return text;
  const notice = `\n\n[AFM system context guard: truncated ${label}. Use read with offset/limit for the omitted lines.]`;
  return text.slice(0, Math.max(0, maxChars - notice.length)).trimEnd() + notice;
 }

 function estimatePayloadTokens(payload: any): number {
  // JSON/tool-call payloads tokenize denser than prose, so chars/3 is a safer
  // compaction signal than Pi's default chars/4 approximation for AFM system.
  return Math.ceil(JSON.stringify(payload).length / 3);
 }

 function estimateAssistantOutputTokens(message: any): number {
  if (!Array.isArray(message?.content)) return 0;

  let chars = 0;
  for (const block of message.content) {
    if (typeof block?.text === "string") chars += block.text.length;
    else if (typeof block?.thinking === "string") chars += block.thinking.length;
    else if (block?.type === "toolCall") chars += JSON.stringify(block).length;
  }
  return Math.ceil(chars / 3);
 }

 function rewriteEditToolSchema(payload: any) {
  if (!payload || !Array.isArray(payload.tools)) return;

  for (const tool of payload.tools) {
    const fn = tool?.function;
    if (tool?.type !== "function" || fn?.name !== "edit") continue;

    fn.description =
      "Edit a single file by replacing one exact text block with replacement text. For multiple separate edits, call this tool multiple times.";
    fn.parameters = {
      type: "object",
      required: ["path", "oldText", "newText"],
      properties: {
        path: {
          type: "string",
          description: "Path to the file to edit, relative to the current working directory or absolute.",
        },
        oldText: {
          type: "string",
          description: "Exact text to replace. It must match one unique block in the original file.",
        },
        newText: {
          type: "string",
          description: "Replacement text for oldText.",
        },
      },
    };
  }
 }

 function rewriteEditPromptGuidance(payload: any) {
  if (!payload || !Array.isArray(payload.messages)) return;

  for (const message of payload.messages) {
    if (message?.role !== "system" && message?.role !== "developer") continue;
    if (typeof message.content !== "string") continue;

    message.content = message.content
      .replace(
        "- edit: Make precise file edits with exact text replacement, including multiple disjoint edits in one call",
        "- edit: Make a precise single-block file edit with exact text replacement",
      )
      .replaceAll("edits[].oldText", "oldText")
      .replace(
        "When changing multiple separate locations in one file, use one edit call with multiple entries in edits[] instead of multiple edit calls",
        "When changing multiple separate locations in one file, use multiple edit calls, one exact replacement per call",
      )
      .replace(
        "Each oldText is matched against the original file, not after earlier edits are applied. Do not emit overlapping or nested edits. Merge nearby changes into one edit.",
        "oldText is matched exactly against the file. Do not include overlapping, nested, or large unchanged regions.",
      );
  }
 }
diff --git a/models.json b/models.json
 {
  "providers": {
    "fm": {
      "name": "Apple Foundation Models",
      "baseUrl": "http://127.0.0.1:1976/v1",
      "api": "openai-completions",
      "apiKey": "fm-local",
      "compat": {
        "supportsStore": false,
        "supportsDeveloperRole": false,
        "supportsReasoningEffort": false,
        "maxTokensField": "max_tokens",
        "supportsStrictMode": false
      },
      "models": [
        {
          "id": "system",
          "name": "Apple Foundation Model (on-device)",
          "reasoning": false,
          "input": [
            "text"
          ],
          "contextWindow": 4096,
          "maxTokens": 512,
          "cost": {
            "input": 0,
            "output": 0,
            "cacheRead": 0,
            "cacheWrite": 0
          }
        },
        {
          "id": "pcc",
          "name": "Apple Foundation Model (Private Cloud Compute)",
          "reasoning": false,
          "input": [
            "text"
          ],
          "contextWindow": 32768,
          "maxTokens": 1024,
          "cost": {
            "input": 0,
            "output": 0,
            "cacheRead": 0,
            "cacheWrite": 0
          }
        }
      ]
    }
  }
 }
diff --git a/settings.json b/settings.json
 {
  "defaultProvider": "fm",
  "defaultModel": "pcc",
  "compaction": {
    "enabled": true,
    "reserveTokens": 1024,
    "keepRecentTokens": 2048
  }
 }
diff --git a/x-post.txt b/x-post.txt
 Pi Coding Agent running on Apple's Foundation Models via `fm serve`.

 Local OpenAI-compatible endpoint:
 http://127.0.0.1:1976/v1/chat/completions

 Configured both:
 - `fm/system` on-device, 4K context
 - `fm/pcc` Private Cloud Compute, 32K context

 Gist:
 https://gist.github.com/Anemll/be5021f5376cf93eb0bd64aad2a8e619
	export default function (pi: any) {
	let lastPayloadTokens = 0;

	pi.on("before_provider_request", (event: any, ctx: any) => {
	if (ctx.model?.provider !== "fm") return;

	const payload = structuredClone(event.payload);
	rewriteEditToolSchema(payload);
	rewriteEditPromptGuidance(payload);
	if (ctx.model?.id === "system") {
	rewriteCompactSystemPrompt(payload);
	rewriteCompactToolSchemas(payload);
	compactLargeToolMessages(payload);
	}
	lastPayloadTokens = estimatePayloadTokens(payload);
	return payload;
	});

	pi.on("message_end", (event: any, ctx: any) => {
	if (ctx.model?.provider !== "fm") return;
	if (event.message?.role !== "assistant") return;

	const message = { ...event.message };
	let changed = false;

	if (
	message.stopReason === "error" &&
	typeof message.errorMessage === "string" &&
	/transcript exceeded the model'?s context size/i.test(message.errorMessage)
	) {
	message.errorMessage = `Input exceeds the context window: ${message.errorMessage}`;
	changed = true;
	}

	if (lastPayloadTokens > 0 && (!message.usage \|\| message.usage.totalTokens === 0)) {
	const outputTokens = estimateAssistantOutputTokens(message);
	message.usage = {
	input: lastPayloadTokens,
	output: outputTokens,
	cacheRead: 0,
	cacheWrite: 0,
	totalTokens: lastPayloadTokens + outputTokens,
	cost: {
	input: 0,
	output: 0,
	cacheRead: 0,
	cacheWrite: 0,
	total: 0,
	},
	};
	changed = true;
	}

	if (changed) return { message };
	});

	pi.on("tool_call", (event: any, ctx: any) => {
	if (ctx.model?.provider !== "fm" \|\| ctx.model?.id !== "system") return;
	if (event.toolName !== "read") return;

	const limit = typeof event.input?.limit === "number" ? event.input.limit : undefined;
	if (limit === undefined \|\| limit > SYSTEM_READ_LINE_LIMIT) {
	event.input.limit = SYSTEM_READ_LINE_LIMIT;
	}
	});
	}

	const SYSTEM_READ_LINE_LIMIT = 80;
	const SYSTEM_TOOL_MESSAGE_CHAR_LIMIT = 1800;
	const SYSTEM_TOTAL_TOOL_CHARS_LIMIT = 2400;
	const SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT = 180;

	function rewriteCompactSystemPrompt(payload: any) {
	if (!payload \|\| !Array.isArray(payload.messages)) return;

	for (const message of payload.messages) {
	if (message?.role !== "system" && message?.role !== "developer") continue;
	if (typeof message.content !== "string") continue;

	const date = message.content.match(/Current date: .*/)?.[0];
	const cwd = message.content.match(/Current working directory: .*/)?.[0];
	message.content = [
	"You are a concise coding assistant running inside pi.",
	"Use tools to inspect files, run commands, and make targeted edits.",
	"This on-device Apple Foundation Model has a small 4K context window.",
	"Keep context small: prefer rg, sed -n, python3 -m py_compile, and read offset/limit over full-file reads.",
	"Do not read the same large file repeatedly. Read only the relevant line ranges.",
	"Use edit for one exact replacement; call edit multiple times for separate replacements.",
	"On macOS, use python3 rather than python unless the user asks otherwise.",
	date,
	cwd,
	]
	.filter(Boolean)
	.join("\n");
	}
	}

	function rewriteCompactToolSchemas(payload: any) {
	if (!payload \|\| !Array.isArray(payload.tools)) return;

	for (const tool of payload.tools) {
	const fn = tool?.function;
	if (tool?.type !== "function" \|\| !fn?.name) continue;

	if (fn.name === "read") {
	fn.description = "Read a file. Use offset and limit for small chunks.";
	fn.parameters = {
	type: "object",
	required: ["path"],
	properties: {
	path: { type: "string", description: "File path." },
	offset: { type: "number", description: "Start line, 1-indexed." },
	limit: { type: "number", description: "Max lines." },
	},
	};
	} else if (fn.name === "bash") {
	fn.description = "Run a shell command.";
	fn.parameters = {
	type: "object",
	required: ["command"],
	properties: {
	command: { type: "string", description: "Command to run." },
	timeout: { type: "number", description: "Timeout in seconds." },
	},
	};
	} else if (fn.name === "write") {
	fn.description = "Create or overwrite a file.";
	fn.parameters = {
	type: "object",
	required: ["path", "content"],
	properties: {
	path: { type: "string", description: "File path." },
	content: { type: "string", description: "File content." },
	},
	};
	} else if (fn.name === "edit") {
	fn.description = "Replace one exact text block in one file.";
	fn.parameters = {
	type: "object",
	required: ["path", "oldText", "newText"],
	properties: {
	path: { type: "string", description: "File path." },
	oldText: { type: "string", description: "Exact text to replace." },
	newText: { type: "string", description: "Replacement text." },
	},
	};
	}
	}
	}

	function compactLargeToolMessages(payload: any) {
	if (!payload \|\| !Array.isArray(payload.messages)) return;

	const toolMessages = payload.messages.filter((message: any) => message?.role === "tool");
	const olderToolMessages = toolMessages.slice(0, Math.max(0, toolMessages.length - 4));
	for (const message of olderToolMessages) {
	const text = getMessageText(message);
	if (text.length > SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT) {
	setMessageText(message, truncateText(text, SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT, "older tool output"));
	}
	}

	for (const message of toolMessages) {
	const text = getMessageText(message);
	if (text.length > SYSTEM_TOOL_MESSAGE_CHAR_LIMIT) {
	setMessageText(message, truncateText(text, SYSTEM_TOOL_MESSAGE_CHAR_LIMIT, "tool output"));
	}
	}

	let totalToolChars = toolMessages.reduce((sum: number, message: any) => sum + getMessageText(message).length, 0);
	for (const message of toolMessages) {
	if (totalToolChars <= SYSTEM_TOTAL_TOOL_CHARS_LIMIT) break;
	const text = getMessageText(message);

	const nextText = truncateText(text, SYSTEM_OLD_TOOL_MESSAGE_CHAR_LIMIT, "older tool output");
	setMessageText(message, nextText);
	totalToolChars -= text.length - nextText.length;
	}
	}

	function getMessageText(message: any): string {
	if (typeof message?.content === "string") return message.content;
	if (!Array.isArray(message?.content)) return "";
	return message.content.map((block: any) => (typeof block?.text === "string" ? block.text : "")).join("\n");
	}

	function setMessageText(message: any, text: string) {
	if (typeof message?.content === "string") {
	message.content = text;
	} else {
	message.content = [{ type: "text", text }];
	}
	}

	function truncateText(text: string, maxChars: number, label: string): string {
	if (text.length <= maxChars) return text;
	const notice = `\n\n[AFM system context guard: truncated ${label}. Use read with offset/limit for the omitted lines.]`;
	return text.slice(0, Math.max(0, maxChars - notice.length)).trimEnd() + notice;
	}

	function estimatePayloadTokens(payload: any): number {
	// JSON/tool-call payloads tokenize denser than prose, so chars/3 is a safer
	// compaction signal than Pi's default chars/4 approximation for AFM system.
	return Math.ceil(JSON.stringify(payload).length / 3);
	}

	function estimateAssistantOutputTokens(message: any): number {
	if (!Array.isArray(message?.content)) return 0;

	let chars = 0;
	for (const block of message.content) {
	if (typeof block?.text === "string") chars += block.text.length;
	else if (typeof block?.thinking === "string") chars += block.thinking.length;
	else if (block?.type === "toolCall") chars += JSON.stringify(block).length;
	}
	return Math.ceil(chars / 3);
	}

	function rewriteEditToolSchema(payload: any) {
	if (!payload \|\| !Array.isArray(payload.tools)) return;

	for (const tool of payload.tools) {
	const fn = tool?.function;
	if (tool?.type !== "function" \|\| fn?.name !== "edit") continue;

	fn.description =
	"Edit a single file by replacing one exact text block with replacement text. For multiple separate edits, call this tool multiple times.";
	fn.parameters = {
	type: "object",
	required: ["path", "oldText", "newText"],
	properties: {
	path: {
	type: "string",
	description: "Path to the file to edit, relative to the current working directory or absolute.",
	},
	oldText: {
	type: "string",
	description: "Exact text to replace. It must match one unique block in the original file.",
	},
	newText: {
	type: "string",
	description: "Replacement text for oldText.",
	},
	},
	};
	}
	}

	function rewriteEditPromptGuidance(payload: any) {
	if (!payload \|\| !Array.isArray(payload.messages)) return;

	for (const message of payload.messages) {
	if (message?.role !== "system" && message?.role !== "developer") continue;
	if (typeof message.content !== "string") continue;

	message.content = message.content
	.replace(
	"- edit: Make precise file edits with exact text replacement, including multiple disjoint edits in one call",
	"- edit: Make a precise single-block file edit with exact text replacement",
	)
	.replaceAll("edits[].oldText", "oldText")
	.replace(
	"When changing multiple separate locations in one file, use one edit call with multiple entries in edits[] instead of multiple edit calls",
	"When changing multiple separate locations in one file, use multiple edit calls, one exact replacement per call",
	)
	.replace(
	"Each oldText is matched against the original file, not after earlier edits are applied. Do not emit overlapping or nested edits. Merge nearby changes into one edit.",
	"oldText is matched exactly against the file. Do not include overlapping, nested, or large unchanged regions.",
	);
	}
	}
	{
	"providers": {
	"fm": {
	"name": "Apple Foundation Models",
	"baseUrl": "http://127.0.0.1:1976/v1",
	"api": "openai-completions",
	"apiKey": "fm-local",
	"compat": {
	"supportsStore": false,
	"supportsDeveloperRole": false,
	"supportsReasoningEffort": false,
	"maxTokensField": "max_tokens",
	"supportsStrictMode": false
	},
	"models": [
	{
	"id": "system",
	"name": "Apple Foundation Model (on-device)",
	"reasoning": false,
	"input": [
	"text"
	],
	"contextWindow": 4096,
	"maxTokens": 512,
	"cost": {
	"input": 0,
	"output": 0,
	"cacheRead": 0,
	"cacheWrite": 0
	}
	},
	{
	"id": "pcc",
	"name": "Apple Foundation Model (Private Cloud Compute)",
	"reasoning": false,
	"input": [
	"text"
	],
	"contextWindow": 32768,
	"maxTokens": 1024,
	"cost": {
	"input": 0,
	"output": 0,
	"cacheRead": 0,
	"cacheWrite": 0
	}
	}
	]
	}
	}
	}
	{
	"defaultProvider": "fm",
	"defaultModel": "pcc",
	"compaction": {
	"enabled": true,
	"reserveTokens": 1024,
	"keepRecentTokens": 2048
	}
	}
	Pi Coding Agent running on Apple's Foundation Models via `fm serve`.

	Local OpenAI-compatible endpoint:
	http://127.0.0.1:1976/v1/chat/completions

	Configured both:
	- `fm/system` on-device, 4K context
	- `fm/pcc` Private Cloud Compute, 32K context

	Gist:
	https://gist.github.com/Anemll/be5021f5376cf93eb0bd64aad2a8e619