|
#!/usr/bin/env python3 |
|
|
|
# Educational Example: Multi-step Wikipedia Search and Read |
|
# |
|
# This script demonstrates how to use an LLM agent with tools to interact with Wikipedia in a multi-step process. |
|
# It allows the LLM to search and read Wikipedia pages iteratively until it gathers enough information to answer a query. |
|
# |
|
# The script uses `lynx --dump` to obtain a textual representation of web pages. |
|
# |
|
# Workflow: |
|
# 1. The AI agent searches Wikipedia based on user queries. |
|
# 2. It reads the content of suggested Wikipedia pages. |
|
# 3. The process repeats until the agent has sufficient information to answer the user's query. |
|
# |
|
# This example is designed to be educational and is suitable for inclusion in an `examples/` directory. |
|
# It illustrates the integration of LLMs with external tools to perform complex tasks. |
|
# |
|
# For those interested in understanding the inner workings, try running the script with the `-v` or `-vv` flags. |
|
# These flags will provide additional insights into the process and can be very helpful for learning purposes. |
|
|
|
import sys |
|
import argparse |
|
import subprocess |
|
import urllib.parse |
|
from functools import partial |
|
|
|
import ell |
|
from ell import Message |
|
from pydantic import Field |
|
|
|
eprint = partial(print, file=sys.stderr) |
|
|
|
VERBOSE = False |
|
|
|
@ell.tool(strict=True) |
|
def search_wikipedia(keywords: str = Field(description="Keywords for Wikipedia search engine.")): |
|
"""Search Wikipedia and return a list of search results and links.""" |
|
if VERBOSE: |
|
eprint(f"Calling tool: search_wikipedia('{keywords}')") |
|
|
|
encoded_query = urllib.parse.quote(keywords) |
|
cmd = f"lynx --dump 'https://en.m.wikipedia.org/w/index.php?search={encoded_query}'" |
|
result = subprocess.run(cmd, shell=True, capture_output=True) |
|
return result.stdout.decode('ISO-8859-1')[:65536] |
|
|
|
@ell.tool(strict=True) |
|
def wikipedia_page_content(wiki_page_url: str): |
|
"""Fetch the content of a Wikipedia page wiki_page_url URL.""" |
|
if VERBOSE: |
|
eprint(f"Calling tool: wikipedia_page_content('{wiki_page_url}')") |
|
|
|
cmd = f"lynx --dump '{wiki_page_url}'" |
|
result = subprocess.run(cmd, shell=True, capture_output=True) |
|
return result.stdout.decode('ISO-8859-1')[:65536] |
|
|
|
@ell.complex(model="gpt-4o", tools=[search_wikipedia, wikipedia_page_content]) |
|
def search_and_read_until_sufficient(message_history: list[Message]) -> list[Message]: |
|
if VERBOSE: |
|
last_msg = message_history[-1].text |
|
if len(last_msg) > 100: |
|
last_msg = last_msg[:100] + "..." |
|
eprint(f"Calling LMP: search_and_read_until_sufficient('{last_msg}')") |
|
return [ |
|
ell.system("You are an AI assistant that searches and reads Wikipedia pages until you have enough information to answer the user's query. Use the search tool to find relevant pages and the read tool to gather information. Continue this process until you can provide a comprehensive answer.") |
|
] + message_history |
|
|
|
def loop_llm_and_tools(f, message_history, max_iterations=100): |
|
iteration = 0 |
|
while iteration < max_iterations: |
|
response_message = f(message_history) |
|
message_history.append(response_message) |
|
|
|
if response_message.tool_calls: |
|
tool_call_response = response_message.call_tools_and_collect_as_message() |
|
message_history.append(tool_call_response) |
|
|
|
# Check if the response is sufficient |
|
if "sufficient information" in tool_call_response.text: |
|
break |
|
else: |
|
break |
|
iteration += 1 |
|
return message_history |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description='Search and read Wikipedia pages until sufficient information is gathered.') |
|
parser.add_argument('query', type=str, help='The query to search for on Wikipedia') |
|
parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity level') |
|
args = parser.parse_args() |
|
|
|
global VERBOSE |
|
VERBOSE = args.verbose > 0 |
|
|
|
ell.init(verbose=(args.verbose > 1), store='./logdir', autocommit=True) |
|
|
|
if args.verbose > 0: |
|
eprint(f"Provided Query: {args.query}") |
|
|
|
message_history = [] |
|
message_history.append(ell.user(args.query)) |
|
|
|
if args.verbose > 1: |
|
eprint(f"message_history at early stage = {message_history}") |
|
|
|
message_history = loop_llm_and_tools(search_and_read_until_sufficient, message_history) |
|
|
|
if args.verbose > 0: |
|
eprint("Final Result:") |
|
print(f"{message_history[-1].text}\n") |
|
|
|
if __name__ == "__main__": |
|
main() |