Created
November 12, 2024 01:53
-
-
Save thesephist/8fa48887e3a6bdb5b9a1a941eec5f28d to your computer and use it in GitHub Desktop.
Polymer LM programming API example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implementation of a naïve retrieval-based question answering LLM program. | |
*/ | |
import { | |
Document, | |
formatPromptLines, | |
getCompletionImpl, | |
loggers, | |
polymer, | |
retrieveImpl, | |
Step, | |
streamArray, | |
} from "../src"; | |
type Request = { query: string }; | |
type Response = { response: string }; | |
// This LLM program will take a user's query and return a response. Polymer lets | |
// us assert this design at the top level, which is then type checked all the | |
// way down to individual steps of the program. | |
const basicQnAStep: Step<Request, Response> = polymer | |
.assert<Request>() | |
// A common pattern in Polymer is to accumulate context into the input record | |
// through the LLM pipeline, each step using useful parts of the accumulated | |
// context to compute new information that may be useful downstream. | |
// | |
// In this case, we'll take the user's query and rewrite it in a way that is | |
// more likely to return relevant results. Later, we'll reference the user's | |
// original query to best answer their question. | |
.extend({ | |
rewritten_query: polymer | |
.completion("rewrite_query", { | |
model: "gpt-4o-mini", | |
renderPrompt: ({ query }: Request) => [ | |
{ | |
role: "user", | |
content: formatPromptLines( | |
`Rewrite the query "${query}" in a way that is more likely to return relevant results.`, | |
`Enclose your final response in <query>...</query>.`, | |
), | |
}, | |
], | |
}) | |
// Polymer is built around streaming, so many operations, like capturing | |
// delimited output from an LLM, are applied onto output streams of the | |
// completion step. | |
// | |
// Here, we'll capture the rewritten query from the LLM's final response, | |
// then `.collect()` it into a string. | |
.capture("<query>", "</query>") // Yields `AsyncStream<Chunk>` | |
.collect(), // Yields `string` | |
}) | |
// We can use `.debug()` during development to log any intermediate values | |
// flowing through our pipeline. | |
.debug("rewritten_query") | |
.extend({ | |
// Now, we'll use the rewritten query to perform a web search. | |
web_search_results: polymer.retrieve("web_search", (x) => ({ | |
source: "web_search", | |
query: x.rewritten_query, | |
})), | |
}) | |
.debug("web_search_results") | |
// Finally, we'll use the web search results to generate an answer to the | |
// user's question. | |
.completion("generate_answer", { | |
model: "gpt-4o-mini", | |
renderPrompt: (input: { | |
query: string; | |
web_search_results: Document[]; | |
}) => [ | |
{ | |
role: "system", | |
content: formatPromptLines( | |
`Answer the user's question based on any relevant web search results you see below.`, | |
``, | |
`<results>`, | |
...input.web_search_results.map( | |
(doc) => | |
`<search-result url="${doc.id}">${doc.content}</search-result>`, | |
), | |
`</results>`, | |
), | |
}, | |
{ | |
role: "user", | |
content: input.query, | |
}, | |
], | |
}) | |
.collect() | |
// TypeScript checks for us that our pipeline returns the correct output type. | |
.map("format_response", (response) => ({ response })); | |
void (async () => { | |
const inputs = [{ query: "Who won the Paris Olympics?" }]; | |
// Polymer steps are stream processors. We can pass them any input stream, and | |
// they'll yield a stream of results. This makes it easy to push large volumes | |
// of data concurrently through the pipeline, saturating any inference or | |
// retrieval backend we may be using. | |
for await (const result of basicQnAStep.stream(streamArray(inputs), { | |
// Here, we'll use the basic default implementation of completion and | |
// retrieval, but Polymer comes out of the box with a variety of | |
// implementations that you can swap in, including ones that support caching | |
// and max concurrency. | |
getCompletion: await getCompletionImpl.main(), | |
retrieve: await retrieveImpl.main(), | |
logger: loggers.consoleLog, | |
})) { | |
console.log(result); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment