Last active
August 15, 2024 04:06
-
-
Save innateessence/1993e8009e5f98197e741ac8dfdfc455 to your computer and use it in GitHub Desktop.
ask-ollama.py - minimal exampe of consuming a local LLM on your machine locally
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
import json | |
import requests | |
import psutil | |
from subprocess import Popen, PIPE | |
""" | |
This requires you to have `ollama` installed already. Install it via your systems package manager. | |
This also expects you to have already pulled the `llama3.1` model via `ollama pull llama3.1` | |
""" | |
def is_ollama_running() -> bool: | |
for proc in psutil.process_iter(): | |
if proc.is_running() and proc.name() == "ollama": | |
return True | |
return False | |
def is_ollama_ready() -> bool: | |
try: | |
url = "http://localhost:11434" | |
r = requests.get(url) | |
return r.ok and r.text == "Ollama is running" | |
except requests.exceptions.ConnectionError: | |
return False | |
def await_ollama_ready() -> None: | |
max_iterations = 10 | |
iterations = 0 | |
while not is_ollama_ready(): | |
if iterations >= max_iterations: | |
raise TimeoutError("Ollama did not start in time") | |
time.sleep(0.5) | |
iterations += 1 | |
def start_ollama() -> None: | |
if is_ollama_running(): | |
return | |
print("Starting ollama...") | |
proc = Popen(["ollama", "serve"], stdout=PIPE, stderr=PIPE, start_new_session=True) | |
def kill_ollama() -> None: | |
if not is_ollama_running(): | |
return | |
print("Killing ollama...") | |
for proc in psutil.process_iter(): | |
if proc.is_running() and proc.name() in ["ollama", "ollama_llama_server"]: | |
proc.kill() | |
def ask_ollama(prompt: str, model: str = "llama3.1") -> None: | |
url = "http://localhost:11434/api/generate" | |
payload = {"model": model, "prompt": prompt} | |
with requests.post(url, json=payload, stream=True) as r: | |
for line in r.iter_lines(): | |
line = line.decode("utf-8") | |
resp = json.loads(line) | |
text = resp["response"] | |
if text == "\n" or text == "\r": | |
print() | |
else: | |
print(text, end="") | |
print() | |
if __name__ == "__main__": | |
start_ollama() | |
await_ollama_ready() | |
ask_ollama("Write me a function that outputs the fibonacci sequence") | |
kill_ollama() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Quickstart
I'm sure there's better ways, but this works and takes no time to build a basic abstraction over, and doesn't surprise me