Skip to content

Instantly share code, notes, and snippets.

@srkirkland
Created September 12, 2025 16:49
Show Gist options
  • Save srkirkland/f9a11f59411c707a9f15fa5655d45c85 to your computer and use it in GitHub Desktop.
Save srkirkland/f9a11f59411c707a9f15fa5655d45c85 to your computer and use it in GitHub Desktop.
Example of using box api to search for files and then get back relevant chunks for the llm
import { createOpenAI } from '@ai-sdk/openai';
import { generateObject, Tool, UIMessageStreamWriter } from 'ai';
import { BoxClient, BoxDeveloperTokenAuth } from 'box-typescript-sdk-gen';
import { FileFull } from 'box-typescript-sdk-gen/lib/schemas/fileFull.generated';
import { LangfuseTraceClient } from 'langfuse';
import { z } from 'zod/v3';
import {
llmChatProvider,
llmSmallChatProvider,
} from '@/shared/services/chatService';
const toolName = 'getBoxInformation';
const toolDescription =
'Get contextual information from Box cloud storage documents';
type ToolExecutionProps = {
prompt: string;
assistantSlug: string;
streamingData: UIMessageStreamWriter | null;
trace: LangfuseTraceClient;
};
// Define the input parameters structure
const toolParams = z.object({
prompt: z
.string()
.describe(
"Break down the user's question into discrete relevant keywords (i.e. a good google search term). Can use 'AND', 'OR', and 'NOT' to separate terms as needed."
),
});
const getBoxClient = () => {
const hardCodedToken = 'devtoken';
let auth = new BoxDeveloperTokenAuth({ token: hardCodedToken });
let client = new BoxClient({ auth });
return client;
};
const getMatchingFiles = async (prompt: string) => {
// TODO: get from assistant
const parentFolderIds = ['51019963404'];
const client = getBoxClient();
// Use the client to search for files matching the prompt
return await client.search.searchForContent({
query: prompt,
type: 'file',
ancestorFolderIds: parentFolderIds,
limit: 10,
// can add content_types if we want to limit to specific search areas
});
};
// Define the tool function
async function toolExecution({
prompt,
assistantSlug,
streamingData,
trace,
}: ToolExecutionProps) {
console.log('BOX TOOL EXECUTION', { prompt, assistantSlug });
const boxClient = await getBoxClient();
// TOOD:
// 1. take prompt and find top 10 matching box docs using /search
// 2. foreach grab fileID and versionID, then plug into https://dl.boxcloud.com/api/2.0/internal_files/[fid]/versions/[vid]/representations/extracted_text/content/
// 3. shove all content into mini model and ask for matches
// 3a. response should include sourceId and array of passages
// 4. return it
// 1.
const searchResults = await getMatchingFiles(prompt);
console.log(
'BOX SEARCH RESULTS',
searchResults.entries?.map((e) => {
return (e as FileFull).name;
})
);
if (!searchResults.entries || searchResults.totalCount === 0) {
return `No matching documents found in Box for the query: "${prompt}"`;
}
// 2.
const fileContents = await Promise.all(
searchResults.entries.map(async (file) => {
if (file.type !== 'file') {
return null;
}
const fileFull = file as FileFull;
try {
const fileID = fileFull.id;
const fileVersion = fileFull.fileVersion?.id;
// now call url
const contentUrl = `https://dl.boxcloud.com/api/2.0/internal_files/${fileID}/versions/${fileVersion}/representations/extracted_text/content/`;
const authHeader = await boxClient.auth.retrieveAuthorizationHeader();
const contentResponse = await fetch(contentUrl, {
headers: {
Authorization: authHeader,
},
});
if (contentResponse.status !== 200) {
console.error(
`Failed to fetch content for file ID ${fileFull.id}, status: ${contentResponse.status}`
);
return null;
}
// get the text content
const textContent = await contentResponse.text();
return {
id: fileFull.id,
name: fileFull.name,
content: textContent,
};
} catch (error) {
console.error(
`Error fetching content for file ID ${fileFull.id}:`,
error
);
return null;
}
})
);
// filter out nulls
const validFileContents = fileContents.filter((f) => !!f);
// grab first 500K characters of each document to keep prompt size down
validFileContents.forEach((f) => {
if (f && f.content.length > 500000) {
f.content = f.content.substring(0, 500000);
}
});
console.log(
'RETRIEVED FILE CONTENTS',
validFileContents.map((f) => f?.name)
);
// 3. mini model
const ChunkedResultSchema = z.object({
id: z.number().describe('The ID of the document'),
name: z.string().describe('The name of the document'),
content: z.string().describe('The content snippet from the document'),
});
const ChunkedResultsSchema = z.object({
rankings: z
.array(ChunkedResultSchema)
.describe('list of the top matching chunks of content'),
});
try {
const modelPrompt =
`You will be given large blocks of text from up to 10 documents. Find relevant passages that match the search query. Return the top 20 most relevant passages, along with the document ID they came from. If no relevant passages are found, return an empty list.\n\n` +
`Search Query: """` +
prompt +
`"""\n\n` +
`Documents and Text Blocks:\n"""` +
validFileContents
.map(
(doc) =>
`Document ID: ${doc.id}\nDocument Name: ${doc.name}\nContent: ${doc.content}\n---\n`
)
.join('') +
`\n"""`;
const openai = createOpenAI({
apiKey: 'sk-goeshere',
});
const { object } = await generateObject({
model: openai('gpt-5-mini'),
schema: ChunkedResultsSchema,
prompt: modelPrompt,
temperature: 0.1,
});
console.log('BOX TOOL RESULT', object);
// 4.
return object;
} catch (e) {
console.error('Error during LLM processing in Box tool:', e);
return `An error occurred while processing the documents. Please try again later.`;
}
}
export const getBoxInformationTool = ({
assistantSlug,
streamingData,
trace,
}: {
assistantSlug: string;
streamingData: UIMessageStreamWriter | null;
trace: LangfuseTraceClient;
}): Record<string, Tool> => ({
[toolName]: {
description: toolDescription,
inputSchema: toolParams,
execute: async ({ prompt }) =>
toolExecution({ prompt, assistantSlug, streamingData, trace }),
},
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment