Skip to content

Instantly share code, notes, and snippets.

@gwpl
Last active November 4, 2024 14:43
Show Gist options
  • Save gwpl/8df5b21ba8862ae5ff961a259ec8f96c to your computer and use it in GitHub Desktop.
Save gwpl/8df5b21ba8862ae5ff961a259ec8f96c to your computer and use it in GitHub Desktop.
wiki_mini_rag ell.tools example and task! of giving LLM tools to search and read wikipedia using ell ( llm_lottery mentioned in task: https://gist.github.com/gwpl/34d94e5dfe8062e9570e9fbb76ef71fb ) . Also in PR: https://github.com/MadcowD/ell/pull/364
#!/usr/bin/env python3
# Educational Example: Using LLM with Wikipedia Tools
#
# This script demonstrates how to use an LLM agent with tools to interact with Wikipedia.
# It provides two main functionalities:
# * Searching Wikipedia for relevant pages.
# * Fetching and reading the content of a Wikipedia page.
#
# The script uses `lynx --dump` to obtain a textual representation of web pages.
#
# Workflow:
# 1. The AI agent searches Wikipedia based on user queries and suggests a page to read.
# * Agent keeps searching, trying different kewords, until sees some promising result.
# 2. The agent uses tool to read the page content to answer the user's query.
#
# This example is designed to be educational and is suitable for inclusion in an `examples/` directory.
# It illustrates the integration of LLMs with external tools to perform complex tasks.
#
# For those interested in understanding the inner workings, try running the script with the `-v` or `-vv` flags.
# These flags will provide additional insights into the process and can be very helpful for learning purposes.
#
# Bonus Task: Consider looking at `llm_lottery.py` which uses `loop_llm_and_tools` to allow the LLM to call tools
# as long as needed to accomplish a task. Think about how you might modify this script to search and read Wikipedia
# pages until it knows enough to provide a sufficient response to a query, such as a comparison between multiple cities.
# (spoiler, example solution: https://gist.github.com/gwpl/27715049d41ec829f21014f3b243850a )
import argparse
import subprocess
import sys
from functools import partial
import urllib.parse
import ell
from ell import Message
from pydantic import Field
eprint = partial(print, file=sys.stderr)
VERBOSE = False
@ell.tool(strict=True)
def search_wikipedia(keywords: str = Field(description="Kewords for wikipedia search engine.")):
"""Search Wikipedia and return a list of search results and links."""
if VERBOSE:
eprint(f"Calling tool: search_wikipedia('{keywords}')")
encoded_query = urllib.parse.quote(keywords)
cmd = f"lynx --dump 'https://en.m.wikipedia.org/w/index.php?search={encoded_query}'"
result = subprocess.run(cmd, shell=True, capture_output=True)
return result.stdout.decode('ISO-8859-1')[:65536]
@ell.tool(strict=True)
def wikipedia_page_content(wiki_page_url: str):
"""Fetch the content of a Wikipedia page given its URL."""
if VERBOSE:
eprint(f"Calling tool: wikipedia_page_content('{wiki_page_url}')")
cmd = f"lynx --dump '{wiki_page_url}'"
result = subprocess.run(cmd, shell=True, capture_output=True)
return result.stdout.decode('ISO-8859-1')[:65536]
@ell.complex(model="gpt-4o-mini", tools=[search_wikipedia])
def search_wikipedia_and_suggest_page_to_read(message_history: list[Message]) -> list[Message]:
if VERBOSE:
last_msg = message_history[-1].text
if len(last_msg) > 100:
last_msg = last_msg[:100] + "..."
eprint(f"Calling LMP: search_wikipedia_and_suggest_page_to_read('{last_msg}')")
return [
ell.system("You are an AI assistant that searches Wikipedia and suggests URL of wikipedia page to read. Return ONLY URL. Call tool that takes keywords for Wikipedia search engine, to get wikipedia search engine results. Based on results suggest one best, most promisting/relevent URL to read.")
] + message_history
@ell.complex(model="gpt-4o", tools=[wikipedia_page_content])
def answer_query_by_reading_wikipedia_page(message_history: list[Message]) -> list[Message]:
if VERBOSE:
last_msg = message_history[-1].text
eprint(f"Calling LMP: answer_query_by_reading_wikipedia_page({last_msg})")
return [
ell.system("You are an AI assistant that reads a Wikipedia page and provides answers based on the information found. Read the content of the Wikipedia page at provided URL. Call tool to fetch content of page. Use the content to provide a comprehensive answer to user query. Include quotes on which answer is based on.")
] + message_history
def loop_llm_and_tools(f, message_history, max_iterations=10):
iteration = 0
while iteration < max_iterations:
response_message = f(message_history)
message_history.append(response_message)
if response_message.tool_calls:
tool_call_response = response_message.call_tools_and_collect_as_message()
message_history.append(tool_call_response)
else:
break
iteration += 1
return message_history
def main():
parser = argparse.ArgumentParser(description='Search Wikipedia and answer questions.')
parser.add_argument('query', type=str, help='The query to search for on Wikipedia')
parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity level')
args = parser.parse_args()
global VERBOSE
VERBOSE = args.verbose > 0
ell.init(verbose=(args.verbose > 1), store='./logdir', autocommit=True)
if args.verbose > 0:
eprint(f"Provided Query: {args.query}")
message_history = []
message_history.append(ell.user(args.query))
if args.verbose > 1:
eprint(f"message_history as early stage = {message_history}")
message_history = loop_llm_and_tools(search_wikipedia_and_suggest_page_to_read, message_history)
url = message_history[-1].text
eprint(f"Intermediate Result: {url}")
message_history = loop_llm_and_tools(answer_query_by_reading_wikipedia_page, message_history)
eprint("Result:")
print(f"{message_history[-1].text}\n")
if __name__ == "__main__":
main()
# This script does the following:
#
# 1. Defines two tools: `search_wikipedia` and `wikipedia_page_content` using the `@ell.tool()` decorator.
# These tools use the `lynx` command-line browser to fetch search results and page content from Wikipedia.
#
# 2. Implements two complex functions using the `@ell.complex` decorator:
# - `search_wikipedia_and_suggest_page_to_read`: Searches Wikipedia and suggests a page URL.
# - `answer_query_by_reading_wikipedia_page`: Reads a Wikipedia page and answers the user's query.
#
# 3. The `main` function sets up argument parsing, initializes ell with the appropriate verbosity, and manages the
# interaction loop using `loop_llm_and_tools` to process the query and fetch results.
#
# 4. The script supports two levels of verbosity:
# - With `-v`, it prints progress information to stderr.
# - With `-vv`, it also enables verbosity for ell.init.
#
# 5. Finally, it prints the intermediate URL result and the final answer based on the Wikipedia page content.
#
# To use this script, you would run it from the command line like this:
#
# ```
# python3 wikipedia_mini_rag.py "Your query here" -v
# ```
#
# Make sure you have the `lynx` command-line browser installed on your system for this script to work properly.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment