davidmezzetti · July 15, 2024 21:08
diff --git a/txtai-llm.py b/txtai-llm.py
 from txtai import LLM

 # Hugging Face models
 llm = LLM("google/gemma-2-9b")

 # llama.cpp models automatically downloaded from HF HUB
 llm = LLM("bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf")

 # Models served via APIs (OpenAI / Claude / Ollama)
 llm = LLM("gpt-4o")
 llm = LLM("claude-3-5-sonnet")
 llm = LLM("ollama/mistral")

 # Inputs as prompt strings
 llm("Tell me how to solve complex math problems")

 # Inputs as chat messages
 llm([
  {"role": "system", "content": "You are a helpful assistant"},
  {"role": "user": "content": "List things to do in DC"}
 ])

 # Run as a FastAPI service
 # config.yml:
 #  llm:
 #    path: google/gemma-2-9b

 $ CONFIG=config.yml uvicorn "txtai.api:app"
 $ curl "http://localhost:8000/llm?text=prompt+string"

 # Run as a Docker API service
 $ docker build -t txtai-api --build-arg BASE_IMAGE=neuml/txtai-gpu api/.
 $ docker run -p 8000:8000 txtai-api
	from txtai import LLM

	# Hugging Face models
	llm = LLM("google/gemma-2-9b")

	# llama.cpp models automatically downloaded from HF HUB
	llm = LLM("bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf")

	# Models served via APIs (OpenAI / Claude / Ollama)
	llm = LLM("gpt-4o")
	llm = LLM("claude-3-5-sonnet")
	llm = LLM("ollama/mistral")

	# Inputs as prompt strings
	llm("Tell me how to solve complex math problems")

	# Inputs as chat messages
	llm([
	{"role": "system", "content": "You are a helpful assistant"},
	{"role": "user": "content": "List things to do in DC"}
	])

	# Run as a FastAPI service
	# config.yml:
	# llm:
	# path: google/gemma-2-9b

	$ CONFIG=config.yml uvicorn "txtai.api:app"
	$ curl "http://localhost:8000/llm?text=prompt+string"

	# Run as a Docker API service
	$ docker build -t txtai-api --build-arg BASE_IMAGE=neuml/txtai-gpu api/.
	$ docker run -p 8000:8000 txtai-api