srkirkland · September 12, 2025 16:49
diff --git a/boxInfoTool.ts b/boxInfoTool.ts
 import { createOpenAI } from '@ai-sdk/openai';
 import { generateObject, Tool, UIMessageStreamWriter } from 'ai';
 import { BoxClient, BoxDeveloperTokenAuth } from 'box-typescript-sdk-gen';
 import { FileFull } from 'box-typescript-sdk-gen/lib/schemas/fileFull.generated';
 import { LangfuseTraceClient } from 'langfuse';
 import { z } from 'zod/v3';

 import {
  llmChatProvider,
  llmSmallChatProvider,
 } from '@/shared/services/chatService';

 const toolName = 'getBoxInformation';

 const toolDescription =
  'Get contextual information from Box cloud storage documents';

 type ToolExecutionProps = {
  prompt: string;
  assistantSlug: string;
  streamingData: UIMessageStreamWriter | null;
  trace: LangfuseTraceClient;
 };

 // Define the input parameters structure
 const toolParams = z.object({
  prompt: z
    .string()
    .describe(
      "Break down the user's question into discrete relevant keywords (i.e. a good google search term).  Can use 'AND', 'OR', and 'NOT' to separate terms as needed."
    ),
 });

 const getBoxClient = () => {
  const hardCodedToken = 'devtoken';
  let auth = new BoxDeveloperTokenAuth({ token: hardCodedToken });
  let client = new BoxClient({ auth });
  return client;
 };

 const getMatchingFiles = async (prompt: string) => {
  // TODO: get from assistant
  const parentFolderIds = ['51019963404'];

  const client = getBoxClient();
  // Use the client to search for files matching the prompt
  return await client.search.searchForContent({
    query: prompt,
    type: 'file',
    ancestorFolderIds: parentFolderIds,
    limit: 10,
    // can add content_types if we want to limit to specific search areas
  });
 };

 // Define the tool function
 async function toolExecution({
  prompt,
  assistantSlug,
  streamingData,
  trace,
 }: ToolExecutionProps) {
  console.log('BOX TOOL EXECUTION', { prompt, assistantSlug });
  const boxClient = await getBoxClient();
  // TOOD:
  // 1. take prompt and find top 10 matching box docs using /search
  // 2. foreach grab fileID and versionID, then plug into https://dl.boxcloud.com/api/2.0/internal_files/[fid]/versions/[vid]/representations/extracted_text/content/
  // 3. shove all content into mini model and ask for matches
  // 3a. response should include sourceId and array of passages
  // 4. return it

  // 1.
  const searchResults = await getMatchingFiles(prompt);
  console.log(
    'BOX SEARCH RESULTS',
    searchResults.entries?.map((e) => {
      return (e as FileFull).name;
    })
  );

  if (!searchResults.entries || searchResults.totalCount === 0) {
    return `No matching documents found in Box for the query: "${prompt}"`;
  }

  // 2.
  const fileContents = await Promise.all(
    searchResults.entries.map(async (file) => {
      if (file.type !== 'file') {
        return null;
      }
      const fileFull = file as FileFull;
      try {
        const fileID = fileFull.id;
        const fileVersion = fileFull.fileVersion?.id;
        // now call url
        const contentUrl = `https://dl.boxcloud.com/api/2.0/internal_files/${fileID}/versions/${fileVersion}/representations/extracted_text/content/`;

        const authHeader = await boxClient.auth.retrieveAuthorizationHeader();

        const contentResponse = await fetch(contentUrl, {
          headers: {
            Authorization: authHeader,
          },
        });

        if (contentResponse.status !== 200) {
          console.error(
            `Failed to fetch content for file ID ${fileFull.id}, status: ${contentResponse.status}`
          );
          return null;
        }

        // get the text content
        const textContent = await contentResponse.text();
        return {
          id: fileFull.id,
          name: fileFull.name,
          content: textContent,
        };
      } catch (error) {
        console.error(
          `Error fetching content for file ID ${fileFull.id}:`,
          error
        );
        return null;
      }
    })
  );

  // filter out nulls
  const validFileContents = fileContents.filter((f) => !!f);

  // grab first 500K characters of each document to keep prompt size down
  validFileContents.forEach((f) => {
    if (f && f.content.length > 500000) {
      f.content = f.content.substring(0, 500000);
    }
  });

  console.log(
    'RETRIEVED FILE CONTENTS',
    validFileContents.map((f) => f?.name)
  );

  // 3. mini model
  const ChunkedResultSchema = z.object({
    id: z.number().describe('The ID of the document'),
    name: z.string().describe('The name of the document'),
    content: z.string().describe('The content snippet from the document'),
  });

  const ChunkedResultsSchema = z.object({
    rankings: z
      .array(ChunkedResultSchema)
      .describe('list of the top matching chunks of content'),
  });

  try {
    const modelPrompt =
      `You will be given large blocks of text from up to 10 documents. Find relevant passages that match the search query. Return the top 20 most relevant passages, along with the document ID they came from. If no relevant passages are found, return an empty list.\n\n` +
      `Search Query: """` +
      prompt +
      `"""\n\n` +
      `Documents and Text Blocks:\n"""` +
      validFileContents
        .map(
          (doc) =>
            `Document ID: ${doc.id}\nDocument Name: ${doc.name}\nContent: ${doc.content}\n---\n`
        )
        .join('') +
      `\n"""`;

    const openai = createOpenAI({
      apiKey: 'sk-goeshere',
    });

    const { object } = await generateObject({
      model: openai('gpt-5-mini'),
      schema: ChunkedResultsSchema,
      prompt: modelPrompt,
      temperature: 0.1,
    });

    console.log('BOX TOOL RESULT', object);

    // 4.
    return object;
  } catch (e) {
    console.error('Error during LLM processing in Box tool:', e);
    return `An error occurred while processing the documents. Please try again later.`;
  }
 }

 export const getBoxInformationTool = ({
  assistantSlug,
  streamingData,
  trace,
 }: {
  assistantSlug: string;
  streamingData: UIMessageStreamWriter | null;
  trace: LangfuseTraceClient;
 }): Record<string, Tool> => ({
  [toolName]: {
    description: toolDescription,
    inputSchema: toolParams,
    execute: async ({ prompt }) =>
      toolExecution({ prompt, assistantSlug, streamingData, trace }),
  },
 });
	import { createOpenAI } from '@ai-sdk/openai';
	import { generateObject, Tool, UIMessageStreamWriter } from 'ai';
	import { BoxClient, BoxDeveloperTokenAuth } from 'box-typescript-sdk-gen';
	import { FileFull } from 'box-typescript-sdk-gen/lib/schemas/fileFull.generated';
	import { LangfuseTraceClient } from 'langfuse';
	import { z } from 'zod/v3';

	import {
	llmChatProvider,
	llmSmallChatProvider,
	} from '@/shared/services/chatService';

	const toolName = 'getBoxInformation';

	const toolDescription =
	'Get contextual information from Box cloud storage documents';

	type ToolExecutionProps = {
	prompt: string;
	assistantSlug: string;
	streamingData: UIMessageStreamWriter \| null;
	trace: LangfuseTraceClient;
	};

	// Define the input parameters structure
	const toolParams = z.object({
	prompt: z
	.string()
	.describe(
	"Break down the user's question into discrete relevant keywords (i.e. a good google search term). Can use 'AND', 'OR', and 'NOT' to separate terms as needed."
	),
	});

	const getBoxClient = () => {
	const hardCodedToken = 'devtoken';
	let auth = new BoxDeveloperTokenAuth({ token: hardCodedToken });
	let client = new BoxClient({ auth });
	return client;
	};

	const getMatchingFiles = async (prompt: string) => {
	// TODO: get from assistant
	const parentFolderIds = ['51019963404'];

	const client = getBoxClient();
	// Use the client to search for files matching the prompt
	return await client.search.searchForContent({
	query: prompt,
	type: 'file',
	ancestorFolderIds: parentFolderIds,
	limit: 10,
	// can add content_types if we want to limit to specific search areas
	});
	};

	// Define the tool function
	async function toolExecution({
	prompt,
	assistantSlug,
	streamingData,
	trace,
	}: ToolExecutionProps) {
	console.log('BOX TOOL EXECUTION', { prompt, assistantSlug });
	const boxClient = await getBoxClient();
	// TOOD:
	// 1. take prompt and find top 10 matching box docs using /search
	// 2. foreach grab fileID and versionID, then plug into https://dl.boxcloud.com/api/2.0/internal_files/[fid]/versions/[vid]/representations/extracted_text/content/
	// 3. shove all content into mini model and ask for matches
	// 3a. response should include sourceId and array of passages
	// 4. return it

	// 1.
	const searchResults = await getMatchingFiles(prompt);
	console.log(
	'BOX SEARCH RESULTS',
	searchResults.entries?.map((e) => {
	return (e as FileFull).name;
	})
	);

	if (!searchResults.entries \|\| searchResults.totalCount === 0) {
	return `No matching documents found in Box for the query: "${prompt}"`;
	}

	// 2.
	const fileContents = await Promise.all(
	searchResults.entries.map(async (file) => {
	if (file.type !== 'file') {
	return null;
	}
	const fileFull = file as FileFull;
	try {
	const fileID = fileFull.id;
	const fileVersion = fileFull.fileVersion?.id;
	// now call url
	const contentUrl = `https://dl.boxcloud.com/api/2.0/internal_files/${fileID}/versions/${fileVersion}/representations/extracted_text/content/`;

	const authHeader = await boxClient.auth.retrieveAuthorizationHeader();

	const contentResponse = await fetch(contentUrl, {
	headers: {
	Authorization: authHeader,
	},
	});

	if (contentResponse.status !== 200) {
	console.error(
	`Failed to fetch content for file ID ${fileFull.id}, status: ${contentResponse.status}`
	);
	return null;
	}

	// get the text content
	const textContent = await contentResponse.text();
	return {
	id: fileFull.id,
	name: fileFull.name,
	content: textContent,
	};
	} catch (error) {
	console.error(
	`Error fetching content for file ID ${fileFull.id}:`,
	error
	);
	return null;
	}
	})
	);

	// filter out nulls
	const validFileContents = fileContents.filter((f) => !!f);

	// grab first 500K characters of each document to keep prompt size down
	validFileContents.forEach((f) => {
	if (f && f.content.length > 500000) {
	f.content = f.content.substring(0, 500000);
	}
	});

	console.log(
	'RETRIEVED FILE CONTENTS',
	validFileContents.map((f) => f?.name)
	);

	// 3. mini model
	const ChunkedResultSchema = z.object({
	id: z.number().describe('The ID of the document'),
	name: z.string().describe('The name of the document'),
	content: z.string().describe('The content snippet from the document'),
	});

	const ChunkedResultsSchema = z.object({
	rankings: z
	.array(ChunkedResultSchema)
	.describe('list of the top matching chunks of content'),
	});

	try {
	const modelPrompt =
	`You will be given large blocks of text from up to 10 documents. Find relevant passages that match the search query. Return the top 20 most relevant passages, along with the document ID they came from. If no relevant passages are found, return an empty list.\n\n` +
	`Search Query: """` +
	prompt +
	`"""\n\n` +
	`Documents and Text Blocks:\n"""` +
	validFileContents
	.map(
	(doc) =>
	`Document ID: ${doc.id}\nDocument Name: ${doc.name}\nContent: ${doc.content}\n---\n`
	)
	.join('') +
	`\n"""`;

	const openai = createOpenAI({
	apiKey: 'sk-goeshere',
	});

	const { object } = await generateObject({
	model: openai('gpt-5-mini'),
	schema: ChunkedResultsSchema,
	prompt: modelPrompt,
	temperature: 0.1,
	});

	console.log('BOX TOOL RESULT', object);

	// 4.
	return object;
	} catch (e) {
	console.error('Error during LLM processing in Box tool:', e);
	return `An error occurred while processing the documents. Please try again later.`;
	}
	}

	export const getBoxInformationTool = ({
	assistantSlug,
	streamingData,
	trace,
	}: {
	assistantSlug: string;
	streamingData: UIMessageStreamWriter \| null;
	trace: LangfuseTraceClient;
	}): Record<string, Tool> => ({
	[toolName]: {
	description: toolDescription,
	inputSchema: toolParams,
	execute: async ({ prompt }) =>
	toolExecution({ prompt, assistantSlug, streamingData, trace }),
	},
	});
No results found