thesephist · November 12, 2024 01:53
diff --git a/0_basic.ts b/0_basic.ts
 /**
 * Implementation of a naïve retrieval-based question answering LLM program.
 */

 import {
  Document,
  formatPromptLines,
  getCompletionImpl,
  loggers,
  polymer,
  retrieveImpl,
  Step,
  streamArray,
 } from "../src";

 type Request = { query: string };
 type Response = { response: string };

 // This LLM program will take a user's query and return a response. Polymer lets
 // us assert this design at the top level, which is then type checked all the
 // way down to individual steps of the program.
 const basicQnAStep: Step<Request, Response> = polymer
  .assert<Request>()
  // A common pattern in Polymer is to accumulate context into the input record
  // through the LLM pipeline, each step using useful parts of the accumulated
  // context to compute new information that may be useful downstream.
  //
  // In this case, we'll take the user's query and rewrite it in a way that is
  // more likely to return relevant results. Later, we'll reference the user's
  // original query to best answer their question.
  .extend({
    rewritten_query: polymer
      .completion("rewrite_query", {
        model: "gpt-4o-mini",
        renderPrompt: ({ query }: Request) => [
          {
            role: "user",
            content: formatPromptLines(
              `Rewrite the query "${query}" in a way that is more likely to return relevant results.`,
              `Enclose your final response in <query>...</query>.`,
            ),
          },
        ],
      })
      // Polymer is built around streaming, so many operations, like capturing
      // delimited output from an LLM, are applied onto output streams of the
      // completion step.
      //
      // Here, we'll capture the rewritten query from the LLM's final response,
      // then `.collect()` it into a string.
      .capture("<query>", "</query>") // Yields `AsyncStream<Chunk>`
      .collect(), // Yields `string`
  })
  // We can use `.debug()` during development to log any intermediate values
  // flowing through our pipeline.
  .debug("rewritten_query")
  .extend({
    // Now, we'll use the rewritten query to perform a web search.
    web_search_results: polymer.retrieve("web_search", (x) => ({
      source: "web_search",
      query: x.rewritten_query,
    })),
  })
  .debug("web_search_results")
  // Finally, we'll use the web search results to generate an answer to the
  // user's question.
  .completion("generate_answer", {
    model: "gpt-4o-mini",
    renderPrompt: (input: {
      query: string;
      web_search_results: Document[];
    }) => [
      {
        role: "system",
        content: formatPromptLines(
          `Answer the user's question based on any relevant web search results you see below.`,
          ``,
          `<results>`,
          ...input.web_search_results.map(
            (doc) =>
              `<search-result url="${doc.id}">${doc.content}</search-result>`,
          ),
          `</results>`,
        ),
      },
      {
        role: "user",
        content: input.query,
      },
    ],
  })
  .collect()
  // TypeScript checks for us that our pipeline returns the correct output type.
  .map("format_response", (response) => ({ response }));

 void (async () => {
  const inputs = [{ query: "Who won the Paris Olympics?" }];

  // Polymer steps are stream processors. We can pass them any input stream, and
  // they'll yield a stream of results. This makes it easy to push large volumes
  // of data concurrently through the pipeline, saturating any inference or
  // retrieval backend we may be using.
  for await (const result of basicQnAStep.stream(streamArray(inputs), {
    // Here, we'll use the basic default implementation of completion and
    // retrieval, but Polymer comes out of the box with a variety of
    // implementations that you can swap in, including ones that support caching
    // and max concurrency.
    getCompletion: await getCompletionImpl.main(),
    retrieve: await retrieveImpl.main(),
    logger: loggers.consoleLog,
  })) {
    console.log(result);
  }
 })();
	/**
	* Implementation of a naïve retrieval-based question answering LLM program.
	*/

	import {
	Document,
	formatPromptLines,
	getCompletionImpl,
	loggers,
	polymer,
	retrieveImpl,
	Step,
	streamArray,
	} from "../src";

	type Request = { query: string };
	type Response = { response: string };

	// This LLM program will take a user's query and return a response. Polymer lets
	// us assert this design at the top level, which is then type checked all the
	// way down to individual steps of the program.
	const basicQnAStep: Step<Request, Response> = polymer
	.assert<Request>()
	// A common pattern in Polymer is to accumulate context into the input record
	// through the LLM pipeline, each step using useful parts of the accumulated
	// context to compute new information that may be useful downstream.
	//
	// In this case, we'll take the user's query and rewrite it in a way that is
	// more likely to return relevant results. Later, we'll reference the user's
	// original query to best answer their question.
	.extend({
	rewritten_query: polymer
	.completion("rewrite_query", {
	model: "gpt-4o-mini",
	renderPrompt: ({ query }: Request) => [
	{
	role: "user",
	content: formatPromptLines(
	`Rewrite the query "${query}" in a way that is more likely to return relevant results.`,
	`Enclose your final response in <query>...</query>.`,
	),
	},
	],
	})
	// Polymer is built around streaming, so many operations, like capturing
	// delimited output from an LLM, are applied onto output streams of the
	// completion step.
	//
	// Here, we'll capture the rewritten query from the LLM's final response,
	// then `.collect()` it into a string.
	.capture("<query>", "</query>") // Yields `AsyncStream<Chunk>`
	.collect(), // Yields `string`
	})
	// We can use `.debug()` during development to log any intermediate values
	// flowing through our pipeline.
	.debug("rewritten_query")
	.extend({
	// Now, we'll use the rewritten query to perform a web search.
	web_search_results: polymer.retrieve("web_search", (x) => ({
	source: "web_search",
	query: x.rewritten_query,
	})),
	})
	.debug("web_search_results")
	// Finally, we'll use the web search results to generate an answer to the
	// user's question.
	.completion("generate_answer", {
	model: "gpt-4o-mini",
	renderPrompt: (input: {
	query: string;
	web_search_results: Document[];
	}) => [
	{
	role: "system",
	content: formatPromptLines(
	`Answer the user's question based on any relevant web search results you see below.`,
	``,
	`<results>`,
	...input.web_search_results.map(
	(doc) =>
	`<search-result url="${doc.id}">${doc.content}</search-result>`,
	),
	`</results>`,
	),
	},
	{
	role: "user",
	content: input.query,
	},
	],
	})
	.collect()
	// TypeScript checks for us that our pipeline returns the correct output type.
	.map("format_response", (response) => ({ response }));

	void (async () => {
	const inputs = [{ query: "Who won the Paris Olympics?" }];

	// Polymer steps are stream processors. We can pass them any input stream, and
	// they'll yield a stream of results. This makes it easy to push large volumes
	// of data concurrently through the pipeline, saturating any inference or
	// retrieval backend we may be using.
	for await (const result of basicQnAStep.stream(streamArray(inputs), {
	// Here, we'll use the basic default implementation of completion and
	// retrieval, but Polymer comes out of the box with a variety of
	// implementations that you can swap in, including ones that support caching
	// and max concurrency.
	getCompletion: await getCompletionImpl.main(),
	retrieve: await retrieveImpl.main(),
	logger: loggers.consoleLog,
	})) {
	console.log(result);
	}
	})();