In this guide, you’ll build a Deep Research tool using AI SDK. It orchestrates two agents — a Supervisor and a Researcher — and runs through three concrete phases:
- Clarifying questions (optional but automatic)
- Research orchestration (supervisor) and evidence gathering/compression (researcher)
- Final report writing with citations
- A single AI SDK
tool()calleddeepResearchthat you can attach to any chat/model - Parallelizable research tasks coordinated by a Supervisor agent
- A Researcher agent that executes tool calls, collects notes, and compresses findings
- Streaming status updates you can show in the UI while research runs
- A final structured report artifact
- Agent roles
- Supervisor: Plans, fans out and coordinates research tasks; decides when the research is “complete”.
- Researcher: Executes search/tool calls and compresses raw findings into high-signal notes.
- Phases
- Clarify: Ask the user a single targeted question if intent is ambiguous.
- Research: Write a brief; loop the Supervisor to call Researcher tasks; compress notes.
- Report: Synthesize a structured report from aggregated notes.
The tool is a thin wrapper that loads config and calls the main workflow. It returns one of three formats: report, clarifying_questions, or problem.
import type { DeepResearchConfig } from './configuration'
import { createDeepResearchConfig } from './configuration'
import { runDeepResearcher } from './deep-researcher'
import { tool, type ModelMessage } from 'ai'
import { z } from 'zod'
import type { StreamWriter } from '../../types'
export const deepResearch = ({
dataStream,
messageId,
messages,
}: {
dataStream: StreamWriter
messageId: string
messages: ModelMessage[]
}) =>
tool({
description: 'Conducts deep, autonomous research... (clarify → research → report)',
inputSchema: z.object({}),
execute: async () => {
const config: DeepResearchConfig = createDeepResearchConfig()
try {
const researchResult = await runDeepResearcher(
{ requestId: messageId, messages },
config,
dataStream,
)
switch (researchResult.type) {
case 'report':
return { ...researchResult.data, format: 'report' as const }
case 'clarifying_question':
return { answer: researchResult.data, format: 'clarifying_questions' as const }
}
} catch (error) {
return {
answer: `Deep research failed with error: ${error instanceof Error ? error.message : String(error)}`,
format: 'problem' as const,
}
}
},
})Usage: attach this tool to your model and invoke it as needed. If the tool returns clarifying_questions, call it again after the user answers.
import { generateText, type ModelMessage } from 'ai'
import { openai } from '@ai-sdk/openai'
import { deepResearch } from '@/lib/ai/tools/deep-research/deep-research'
export async function startDeepResearch({
messages,
messageId,
dataStream,
}: {
messages: ModelMessage[]
messageId: string
dataStream: any // StreamWriter
}) {
return generateText({
model: openai('gpt-4o'),
messages,
tools: {
deepResearch: deepResearch({ dataStream, messageId, messages }),
},
// Let the model call the tool; you can also force toolChoice if desired
})
}If allow_clarification is enabled in config, the workflow asks a single targeted question using structured output. If clarification is needed, the tool returns early with format: 'clarifying_questions'.
import { generateObject, type ModelMessage } from 'ai'
import { z } from 'zod'
const ClarifySchema = z.object({
need_clarification: z.boolean(),
question: z.string().optional(),
})
export async function clarifyWithUser(messages: ModelMessage[], config: { research_model: any }) {
const result = await generateObject({
model: config.research_model,
schema: ClarifySchema,
messages: [
{ role: 'system', content: 'Determine if user intent is ambiguous. Ask exactly one clarifying question only if needed.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
return result.object
}Client logic: show the question; when the user replies, append their answer to the conversation and invoke the same tool again.
This phase has two parts:
- Write a concise research brief from the conversation
- Supervisor coordinates one or more Researcher runs and aggregates notes
import { generateObject } from 'ai'
import { z } from 'zod'
export type StreamWriter = { write: (part: any) => void }
const BriefSchema = z.object({
title: z.string(),
research_brief: z.string(),
})
export async function writeResearchBrief(messages: any[], config: { research_model: any }, dataStream: StreamWriter) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'running' } })
const result = await generateObject({
model: (config as any).research_model,
schema: BriefSchema,
messages: [
{ role: 'system', content: 'Draft a short research brief and a concise title capturing the user’s request.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'completed', message: result.object.research_brief } })
return result.object
}
abstract class AgentBase { constructor(protected config: any, protected dataStream: StreamWriter) {} }
export class SupervisorAgent extends AgentBase {
constructor(config: any, dataStream: StreamWriter, private researcher: { executeResearch: (topic: string) => Promise<{ compressed_research: string; raw_notes: string[]; messages: any[] }> }) {
super(config, dataStream)
}
async run(researchBrief: string): Promise<{ notes: string[] }> {
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Coordinating research tasks', type: 'thoughts', status: 'completed' } })
const result = await this.researcher.executeResearch(researchBrief)
return { notes: [result.compressed_research] }
}
}import { generateText, type ModelMessage } from 'ai'
import type { StreamWriter } from './brief-and-supervisor'
export class ResearcherAgent {
constructor(private config: { research_model: any; compression_model: any }, private dataStream: StreamWriter) {}
async executeResearch(topic: string): Promise<{ compressed_research: string; raw_notes: string[]; messages: ModelMessage[] }> {
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Starting research on topic', message: topic, type: 'thoughts', status: 'completed' } })
const baseMessages: ModelMessage[] = [
{ role: 'system', content: 'You are a meticulous web researcher. Use tools if available, cite sources.' },
{ role: 'user', content: topic },
]
const research = await generateText({ model: this.config.research_model, messages: baseMessages })
const compressMessages: ModelMessage[] = [
{ role: 'system', content: 'Compress the following research into a concise, factual summary with citations preserved. Avoid speculation.' },
...research.response.messages,
{ role: 'user', content: 'Provide a tight summary of findings.' },
]
const compressed = await generateText({ model: this.config.compression_model, messages: compressMessages })
const compressedText = compressed.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Research summarized', type: 'thoughts', status: 'completed' } })
return {
compressed_research: compressedText,
raw_notes: [research.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')],
messages: research.response.messages,
}
}
}Notes:
getAllTools(...)must return at least one search/extraction tool (search API or MCP). Otherwise the Researcher throws a clear error.- The Supervisor limits concurrency via
config.max_concurrent_research_unitsand enforces an iteration cap viaconfig.max_researcher_iterations.
Throughout the workflow, the system writes typed updates to dataStream:
dataStream.write({
type: 'data-researchUpdate',
data: {
title: 'Starting research',
type: 'started' | 'thoughts' | 'writing' | 'completed',
status?: 'running' | 'completed',
message?: string,
timestamp?: number,
},
})You can listen to these events to render a live timeline: brief creation, each research subtask, compression finishes, and final report writing.
The final phase synthesizes all accumulated notes into a single report. It streams a progress update while writing and returns the complete report content.
import { generateText, type ModelMessage } from 'ai'
type StreamWriter = { write: (part: any) => void }
export async function finalReportGeneration({
research_brief,
findings,
config,
dataStream,
title,
}: {
research_brief: string
findings: string
config: { final_report_model: any }
dataStream: StreamWriter
title: string
}) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'running' } })
const messages: ModelMessage[] = [
{ role: 'system', content: 'Write a well-structured, well-cited research report. Be concise and precise.' },
{ role: 'user', content: `Brief:\n${research_brief}\n\nFindings:\n${findings}` },
]
const result = await generateText({ model: (config as any).final_report_model, messages })
const content = result.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'completed' } })
return { id: title.toLowerCase().replace(/\s+/g, '-'), title, kind: 'text' as const, content }
}At the top-level, runDeepResearcher stitches the three phases together and returns { type: 'report', data: { id, title, kind: 'text', content } }.
import { deepResearch } from '@/lib/ai/tools/deep-research/deep-research'
import { generateText, type ModelMessage } from 'ai'
import { openai } from '@ai-sdk/openai'
export async function handleUserQuery({
messages,
messageId,
dataStream,
}: {
messages: ModelMessage[]
messageId: string
dataStream: any
}) {
const res = await generateText({
model: openai('gpt-4o'),
messages,
tools: { deepResearch: deepResearch({ dataStream, messageId, messages }) },
})
// In your presenter: if the latest tool result has format === 'clarifying_questions',
// show the question and call deepResearch again after the user answers.
// If format === 'report', render the report artifact (res comes from tool).
return res
}import { generateText, generateObject, tool, type ModelMessage } from 'ai'
import { z } from 'zod'
type StreamWriter = { write: (part: any) => void }
type Config = {
research_model: any
compression_model: any
final_report_model: any
allow_clarification: boolean
}
const ClarifySchema = z.object({ need_clarification: z.boolean(), question: z.string().optional() })
const BriefSchema = z.object({ title: z.string(), research_brief: z.string() })
async function clarifyWithUser(messages: ModelMessage[], config: Config) {
if (!config.allow_clarification) return { need_clarification: false as const }
const { object } = await generateObject({
model: config.research_model,
schema: ClarifySchema,
messages: [
{ role: 'system', content: 'Determine if user intent is ambiguous. Ask exactly one clarifying question only if needed.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
return object
}
async function writeResearchBrief(messages: ModelMessage[], config: Config, dataStream: StreamWriter) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'running' } })
const { object } = await generateObject({
model: config.research_model,
schema: BriefSchema,
messages: [
{ role: 'system', content: 'Draft a short research brief and a concise title capturing the user’s request.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'completed', message: object.research_brief } })
return object
}
async function executeResearch(topic: string, config: Config, dataStream: StreamWriter) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Starting research on topic', message: topic, type: 'thoughts', status: 'completed' } })
const base: ModelMessage[] = [
{ role: 'system', content: 'You are a meticulous web researcher. Use tools if available, cite sources.' },
{ role: 'user', content: topic },
]
const research = await generateText({ model: config.research_model, messages: base })
const compress: ModelMessage[] = [
{ role: 'system', content: 'Compress the following research into a concise, factual summary with citations preserved. Avoid speculation.' },
...research.response.messages,
{ role: 'user', content: 'Provide a tight summary of findings.' },
]
const summary = await generateText({ model: config.compression_model, messages: compress })
const compressedText = summary.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Research summarized', type: 'thoughts', status: 'completed' } })
return { compressed_research: compressedText }
}
async function finalReport({ research_brief, findings, config, dataStream, title }: { research_brief: string; findings: string; config: Config; dataStream: StreamWriter; title: string }) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'running' } })
const messages: ModelMessage[] = [
{ role: 'system', content: 'Write a well-structured, well-cited research report. Be concise and precise.' },
{ role: 'user', content: `Brief:\n${research_brief}\n\nFindings:\n${findings}` },
]
const result = await generateText({ model: config.final_report_model, messages })
const content = result.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'completed' } })
return { id: title.toLowerCase().replace(/\s+/g, '-'), title, kind: 'text' as const, content }
}
export async function runDeepResearcher({ requestId, messages, config, dataStream }: { requestId: string; messages: ModelMessage[]; config: Config; dataStream: StreamWriter }) {
const clarify = await clarifyWithUser(messages, config)
if ((clarify as any).need_clarification && (clarify as any).question) return { type: 'clarifying_question' as const, data: (clarify as any).question }
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Starting research', type: 'started', timestamp: Date.now() } })
const brief = await writeResearchBrief(messages, config, dataStream)
const notes = await executeResearch(brief.research_brief, config, dataStream)
const report = await finalReport({ research_brief: brief.research_brief, findings: notes.compressed_research, config, dataStream, title: brief.title })
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Research complete', type: 'completed', timestamp: Date.now() } })
return { type: 'report' as const, data: report }
}
export function deepResearch({ dataStream, messageId, messages, config }: { dataStream: StreamWriter; messageId: string; messages: ModelMessage[]; config: Config }) {
return tool({ description: 'Deep research: clarify → research → report', inputSchema: z.object({}), execute: async () => {
const res = await runDeepResearcher({ requestId: messageId, messages, config, dataStream })
if (res.type === 'report') return { ...res.data, format: 'report' as const }
return { answer: (res as any).data, format: 'clarifying_questions' as const }
} })
}These options are read throughout the workflow (see configuration.ts, prompts.ts, and utils.ts references in the implementation):
- Models:
research_model,compression_model,final_report_model - Token budgets:
research_model_max_tokens,compression_model_max_tokens,final_report_model_max_tokens - Concurrency & limits:
max_concurrent_research_units,max_researcher_iterations - Clarification:
allow_clarification - Tools:
search_api_max_queries,mcp_prompt(for MCP tools)
Example override:
import { createDeepResearchConfig } from '@/lib/ai/tools/deep-research/configuration'
const config = createDeepResearchConfig({
allow_clarification: true,
max_concurrent_research_units: 3,
max_researcher_iterations: 6,
research_model: 'gpt-4o',
compression_model: 'gpt-4o-mini',
final_report_model: 'gpt-4o',
search_api_max_queries: 8,
})- When the tool returns
format: 'clarifying_questions', display the question and re-run after the user responds. - While running, show entries from
dataStreamto provide live progress (brief, subtasks, compression, report). - When the tool returns
format: 'report', renderdata.contentas the final text report and surfacedata.title. - If
format: 'problem', show the error string.
The following is a compact, working implementation that mirrors the three phases (clarify → research → report) and the Supervisor/Researcher agents. It removes logging/telemetry and keeps the core control flow intact.
import { generateObject, generateText, tool, type ModelMessage } from 'ai'
import { z } from 'zod'
// Minimal stream writer used by the guide; adapt to your app's streaming infra
export type StreamWriter = { write: (part: any) => void }
// Models are passed directly; use any AI SDK provider (e.g. openai('gpt-4o'))
export type DeepResearchConfig = {
research_model: any
compression_model: any
final_report_model: any
allow_clarification: boolean
max_concurrent_research_units: number
max_researcher_iterations: number
}
// Structured outputs
const ClarifySchema = z.object({
need_clarification: z.boolean(),
question: z.string().optional(),
})
const BriefSchema = z.object({
title: z.string(),
research_brief: z.string(),
})
// Agents
abstract class AgentBase {
constructor(protected config: DeepResearchConfig, protected dataStream: StreamWriter) {}
}
class ResearcherAgent extends AgentBase {
async executeResearch(topic: string): Promise<{ compressed_research: string; raw_notes: string[]; messages: ModelMessage[] }> {
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Starting research on topic', message: topic, type: 'thoughts', status: 'completed' } })
const baseMessages: ModelMessage[] = [
{ role: 'system', content: 'You are a meticulous web researcher. Use tools if available, cite sources.' },
{ role: 'user', content: topic },
]
const research = await generateText({
model: this.config.research_model,
messages: baseMessages,
})
const compressMessages: ModelMessage[] = [
{ role: 'system', content: 'Compress the following research into a concise, factual summary with citations preserved. Avoid speculation.' },
...research.response.messages,
{ role: 'user', content: 'Provide a tight summary of findings.' },
]
const compressed = await generateText({
model: this.config.compression_model,
messages: compressMessages,
})
const compressedText = compressed.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Research summarized', type: 'thoughts', status: 'completed' } })
return {
compressed_research: compressedText,
raw_notes: [research.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')],
messages: research.response.messages,
}
}
}
class SupervisorAgent extends AgentBase {
private researcher = new ResearcherAgent(this.config, this.dataStream)
async run(researchBrief: string): Promise<{ notes: string[] }> {
this.dataStream.write({ type: 'data-researchUpdate', data: { title: 'Coordinating research tasks', type: 'thoughts', status: 'completed' } })
// Simplified strategy: one focused research unit using the full brief
// In a more advanced version, split brief into sub-topics and fan out
const result = await this.researcher.executeResearch(researchBrief)
return { notes: [result.compressed_research] }
}
}
// Phase 1: Clarify
export async function clarifyWithUser(messages: ModelMessage[], config: DeepResearchConfig) {
const result = await generateObject({
model: config.research_model,
schema: ClarifySchema,
messages: [
{ role: 'system', content: 'Determine if user intent is ambiguous. Ask exactly one clarifying question only if needed.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
return result.object
}
// Phase 2a: Brief
export async function writeResearchBrief(messages: ModelMessage[], config: DeepResearchConfig, dataStream: StreamWriter) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'running' } })
const result = await generateObject({
model: config.research_model,
schema: BriefSchema,
messages: [
{ role: 'system', content: 'Draft a short research brief and a concise title capturing the user’s request.' },
{ role: 'user', content: JSON.stringify(messages) },
],
})
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing research brief', type: 'writing', status: 'completed', message: result.object.research_brief } })
return result.object
}
// Phase 3: Report
export async function finalReportGeneration({
research_brief,
findings,
config,
dataStream,
title,
}: {
research_brief: string
findings: string
config: DeepResearchConfig
dataStream: StreamWriter
title: string
}) {
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'running' } })
const messages: ModelMessage[] = [
{ role: 'system', content: 'Write a well-structured, well-cited research report. Be concise and precise.' },
{ role: 'user', content: `Brief:\n${research_brief}\n\nFindings:\n${findings}` },
]
const result = await generateText({ model: config.final_report_model, messages })
const content = result.response.messages.map(m => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content))).join('\n')
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Writing final report', type: 'writing', status: 'completed' } })
return { id: title.toLowerCase().replace(/\s+/g, '-'), title, kind: 'text' as const, content }
}
// Orchestrator
export async function runDeepResearcher({
requestId,
messages,
config,
dataStream,
}: {
requestId: string
messages: ModelMessage[]
config: DeepResearchConfig
dataStream: StreamWriter
}): Promise<
| { type: 'clarifying_question'; data: string }
| { type: 'report'; data: { id: string; title: string; kind: 'text'; content: string } }
> {
if (config.allow_clarification) {
const clarify = await clarifyWithUser(messages, config)
if (clarify.need_clarification && clarify.question) {
return { type: 'clarifying_question', data: clarify.question }
}
}
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Starting research', type: 'started', timestamp: Date.now() } })
const brief = await writeResearchBrief(messages, config, dataStream)
const supervisor = new SupervisorAgent(config, dataStream)
const supervision = await supervisor.run(brief.research_brief)
const report = await finalReportGeneration({
research_brief: brief.research_brief,
findings: supervision.notes.join('\n'),
config,
dataStream,
title: brief.title,
})
dataStream.write({ type: 'data-researchUpdate', data: { title: 'Research complete', type: 'completed', timestamp: Date.now() } })
return { type: 'report', data: report }
}
// AI SDK tool wrapper for the orchestrator
export function deepResearch({ dataStream, messageId, messages, config }: {
dataStream: StreamWriter
messageId: string
messages: ModelMessage[]
config: DeepResearchConfig
}) {
return tool({
description: 'Deep research: clarify → research → report',
inputSchema: z.object({}),
execute: async () => {
const res = await runDeepResearcher({ requestId: messageId, messages, config, dataStream })
if (res.type === 'report') return { ...res.data, format: 'report' as const }
return { answer: (res as any).data, format: 'clarifying_questions' as const }
},
})
}