Skip to content

Instantly share code, notes, and snippets.

@masylum
Created May 11, 2026 17:06
Show Gist options
  • Select an option

  • Save masylum/3ffdcacc57e6b386824ea1249feaa941 to your computer and use it in GitHub Desktop.

Select an option

Save masylum/3ffdcacc57e6b386824ea1249feaa941 to your computer and use it in GitHub Desktop.
import {
DynamicWorkerExecutor,
type Executor,
type ResolvedProvider,
type ToolProvider,
} from "@cloudflare/codemode"
import { aiTools, createCodeTool } from "@cloudflare/codemode/ai"
import {
createWorkspaceStateBackend,
STATE_TYPES,
type Workspace,
} from "@cloudflare/shell"
import { type CdpSession, connectBrowser } from "agents/browser"
import type { ToolSet } from "ai"
import dedent from "dedent"
import type { WorkerAgent } from "../agent"
import { base64ToBytes, bytesToBase64 } from "../base64"
const defaultCdpTimeoutMs = 60_000
export interface CreateExecuteToolOptions {
agent: WorkerAgent
codemodeTools: ToolSet
cdpTimeoutMs?: number
}
export function createExecuteTool(options: CreateExecuteToolOptions) {
const { agent, codemodeTools, cdpTimeoutMs } = options
const cdpTimeout = cdpTimeoutMs ?? defaultCdpTimeoutMs
const executor = new DynamicWorkerExecutor({
loader: agent.bindings.LOADER,
timeout: 60_000,
globalOutbound: null,
})
const stateProvider = createBinarySafeStateProvider(agent.workspace)
const stateRef = {
sessionId: null as CdpSession | null,
pageSessionId: null as string | null,
}
const cdpProvider = createCdpProvider(agent, cdpTimeout, stateRef)
function closeBrowserSession() {
const s = stateRef.sessionId
if (s) {
s.close()
stateRef.sessionId = null
stateRef.pageSessionId = null
}
}
// we use this to be able to inject the cdp session :)
const proxyExecutor: Executor = {
execute: async (code: string, providers: ResolvedProvider[]) => {
closeBrowserSession()
try {
return await executor.execute(code, providers)
} finally {
closeBrowserSession()
}
},
}
return createCodeTool({
tools: [aiTools(codemodeTools), stateProvider, cdpProvider],
executor: proxyExecutor,
description: buildDescription(),
})
}
const PAGE_SCOPED_DOMAINS = new Set([
"Page",
"Runtime",
"DOM",
"Input",
"Emulation",
"Network",
"CSS",
"Overlay",
"Log",
"Console",
"Performance",
"Accessibility",
"Animation",
"Audits",
"DOMDebugger",
"DOMSnapshot",
"DOMStorage",
"Fetch",
"IndexedDB",
"LayerTree",
"Media",
"ServiceWorker",
"Storage",
"WebAudio",
"WebAuthn",
])
function createCdpProvider(
agent: WorkerAgent,
timeoutMs: number,
ref: { sessionId: CdpSession | null; pageSessionId: string | null },
): ToolProvider {
const binding = agent.bindings.BROWSER
async function ensureSession(): Promise<CdpSession> {
if (ref.sessionId) return ref.sessionId
const session = await connectBrowser(binding, timeoutMs)
ref.sessionId = session
return session
}
async function ensurePage(): Promise<string> {
if (ref.pageSessionId) return ref.pageSessionId
const session = await ensureSession()
const { targetId } = (await session.send("Target.createTarget", {
url: "about:blank",
})) as { targetId: string }
ref.pageSessionId = await session.attachToTarget(targetId)
await session.send("Page.enable", {}, { sessionId: ref.pageSessionId })
await session.send("Runtime.enable", {}, { sessionId: ref.pageSessionId })
return ref.pageSessionId
}
return {
name: "cdp",
positionalArgs: true,
types: CDP_TYPES,
tools: {
send: {
description: dedent`
Send a CDP command. A default page target is auto-created on the first
page-scoped command — you don't need Target.createTarget or sessionId
for single-page flows. Just call Page.navigate, Runtime.evaluate, etc.
For multi-tab flows, use Target.createTarget + cdp.attachToTarget()
to switch the default page target.
`,
execute: async (
method: string,
params: unknown,
opts?: { timeoutMs?: number; sessionId?: string },
) => {
const session = await ensureSession()
const domain = method.split(".")[0] ?? ""
const options = opts ?? {}
if (PAGE_SCOPED_DOMAINS.has(domain) && !options.sessionId)
options.sessionId = await ensurePage()
return session.send(method, params, options)
},
},
attachToTarget: {
description: dedent`
Attach to a target and set it as the default page.
Subsequent page-scoped cdp.send() calls will use this sessionId
automatically. Returns the sessionId.
`,
execute: async (targetId: unknown, opts: unknown) => {
const session = await ensureSession()
const sid = await session.attachToTarget(
targetId as string,
opts as { timeoutMs?: number },
)
ref.pageSessionId = sid
return sid
},
},
getDebugLog: {
execute: async (limit: unknown) =>
(await ensureSession()).getDebugLog(limit as number | undefined),
},
clearDebugLog: {
execute: async () => {
;(await ensureSession()).clearDebugLog()
},
},
} as unknown as ToolProvider["tools"],
}
}
/**
* Patch the 3 binary methods on a StateBackend that break over the codemode
* RPC bridge (which JSON.stringify's positional args, corrupting Uint8Array).
*
* TODO: remove once upstream fixes the RPC bridge
* https://github.com/cloudflare/agents/issues/1494
*/
type StateFn = (...args: unknown[]) => Promise<unknown>
type SimpleToolEntry = { description?: string; execute: StateFn }
function createBinarySafeStateProvider(workspace: Workspace): ToolProvider {
const backend = createWorkspaceStateBackend(workspace)
const proto = Object.getPrototypeOf(backend) as Record<string, unknown>
const tools: Record<string, SimpleToolEntry> = {}
for (const key of Object.getOwnPropertyNames(proto)) {
if (key === "constructor" || key === "getCapabilities") continue
if (patchedBinaryMethods.has(key)) continue
const fn = proto[key]
if (typeof fn !== "function") continue
tools[key] = {
execute: (fn as (...a: unknown[]) => unknown).bind(backend) as StateFn,
}
}
tools.writeFileBytes = {
description: dedent`
Write a base64-encoded payload (e.g. a CDP screenshot) to a file.
`,
execute: async (...args: unknown[]) => {
const [path, base64, mimeType] = args as [
string,
string,
string | undefined,
]
if (typeof base64 !== "string") {
throw new Error(
"state.writeFileBytes(path, base64, mimeType?) requires a base64 string — pass cdp screenshot data straight through (shot.data) or btoa(...) any other binary payload.",
)
}
await workspace.writeFileBytes(path, base64ToBytes(base64), mimeType)
},
}
tools.readFileBytes = {
description: dedent`
Read a file as a base64 string.
`,
execute: async (...args: unknown[]) => {
const [path] = args as [string]
return bytesToBase64(await backend.readFileBytes(path))
},
}
tools.appendFile = {
execute: async (...args: unknown[]) => {
const [path, content] = args as [string, string]
if (typeof content !== "string") {
throw new Error(
"state.appendFile(path, content) only accepts strings; for binary writes use state.writeFileBytes with a base64 payload.",
)
}
await backend.appendFile(path, content)
},
}
return {
name: "state",
positionalArgs: true,
types: binarySafeStateTypes,
tools,
}
}
const patchedBinaryMethods = new Set([
"writeFileBytes",
"readFileBytes",
"appendFile",
])
const binarySafeStateTypes = STATE_TYPES.replace(
"readFileBytes(path: string): Promise<Uint8Array>",
"readFileBytes(path: string): Promise<string>",
)
.replace(
"writeFileBytes(path: string, content: Uint8Array): Promise<void>",
"writeFileBytes(path: string, base64: string, mimeType?: string): Promise<void>",
)
.replace(
"appendFile(path: string, content: string | Uint8Array): Promise<void>",
"appendFile(path: string, content: string): Promise<void>",
)
.replace("Read a file as bytes.", "Read a file as a base64 string.")
.replace(
"Write bytes to a file.",
"Write a base64-encoded payload (e.g. a CDP screenshot) to a file.",
)
.replace(
"Append text or bytes to a file.",
"Append text to a file. (Use writeFileBytes for binary payloads.)",
)
const CDP_TYPES = `declare const cdp: {
send(method: string, params?: unknown, options?: {
timeoutMs?: number;
sessionId?: string;
}): Promise<unknown>;
attachToTarget(targetId: string, options?: {
timeoutMs?: number;
}): Promise<string>;
getDebugLog(limit?: number): Promise<unknown[]>;
clearDebugLog(): Promise<void>;
};`
function buildDescription(): string {
return dedent`
Create a program to safely and effectively achieve a goal.
Available:
{{types}}
Write an async arrow function in JavaScript that returns the result.
Do NOT use TypeScript syntax — no type annotations, interfaces, or generics.
Do NOT define named functions then call them — just write the arrow function body directly.
Use this when you need to chain multiple tool calls together, compose web/file/browser work,
or persist large intermediates without round-tripping bytes through your context window.
When using cdp.*, a browser session is lazily opened on the first cdp call, reused for
the whole program, then closed automatically. A default page target is auto-created on
the first page-scoped command (Page.*, Runtime.*, DOM.*, Input.*, etc.) — you do NOT need
to call Target.createTarget or manage sessionIds for single-page flows.
No state nor session carries between execute calls — bundle navigate / interact / scrape / capture
into one program. If you want state, save it to the workspace with state.writeFile or state.writeFileBytes.
Example — search, fetch, save:
async () => {
const hits = await codemode.searchWeb({ query: "foo bar", limit: 3 });
const out = [];
for (const hit of hits) {
const page = await codemode.getWebMarkdown({ url: hit.url });
await state.writeFile("/web/" + page.title + ".md", page.content);
out.push({ url: page.url, length: page.contentLength });
}
return out;
}
Example — navigate, interact, screenshot:
async () => {
await cdp.send("Page.navigate", { url: "https://example.com" });
await new Promise(r => setTimeout(r, 2000));
await cdp.send("Runtime.evaluate", {
expression: "document.querySelector('.cookie-banner button')?.click()"
});
const shot = await cdp.send("Page.captureScreenshot", { format: "png" });
await state.writeFileBytes("/captures/example.png", shot.data, "image/png");
return { saved: "/captures/example.png" };
}
`
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment