Last active
January 3, 2025 15:18
-
-
Save CodeWithBehnam/9b1c9bae94158775e44ce7900b320560 to your computer and use it in GitHub Desktop.
Ollama Python Examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
================================================ | |
File: /examples/README.md | |
================================================ | |
# Running Examples | |
Run the examples in this directory with: | |
```sh | |
# Run example | |
python3 examples/<example>.py | |
``` | |
### Chat - Chat with a model | |
- [chat.py](chat.py) | |
- [async-chat.py](async-chat.py) | |
- [chat-stream.py](chat-stream.py) - Streamed outputs | |
- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation | |
### Generate - Generate text with a model | |
- [generate.py](generate.py) | |
- [async-generate.py](async-generate.py) | |
- [generate-stream.py](generate-stream.py) - Streamed outputs | |
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle | |
### Tools/Function Calling - Call a function with a model | |
- [tools.py](tools.py) - Simple example of Tools/Function Calling | |
- [async-tools.py](async-tools.py) | |
### Multimodal with Images - Chat with a multimodal (image chat) model | |
- [multimodal_chat.py](multimodal_chat.py) | |
- [multimodal_generate.py](multimodal_generate.py) | |
### Structured Outputs - Generate structured outputs with a model | |
- [structured-outputs.py](structured-outputs.py) | |
- [async-structured-outputs.py](async-structured-outputs.py) | |
- [structured-outputs-image.py](structured-outputs-image.py) | |
### Ollama List - List all downloaded models and their properties | |
- [list.py](list.py) | |
### Ollama ps - Show model status with CPU/GPU usage | |
- [ps.py](ps.py) | |
### Ollama Pull - Pull a model from Ollama | |
Requirement: `pip install tqdm` | |
- [pull.py](pull.py) | |
### Ollama Create - Create a model from a Modelfile | |
```python | |
python create.py <model> <modelfile> | |
``` | |
- [create.py](create.py) | |
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format. | |
### Ollama Embed - Generate embeddings with a model | |
- [embed.py](embed.py) | |
================================================ | |
File: /examples/async-chat.py | |
================================================ | |
import asyncio | |
from ollama import AsyncClient | |
async def main(): | |
messages = [ | |
{ | |
'role': 'user', | |
'content': 'Why is the sky blue?', | |
}, | |
] | |
client = AsyncClient() | |
response = await client.chat('llama3.2', messages=messages) | |
print(response['message']['content']) | |
if __name__ == '__main__': | |
asyncio.run(main()) | |
================================================ | |
File: /examples/async-generate.py | |
================================================ | |
import asyncio | |
import ollama | |
async def main(): | |
client = ollama.AsyncClient() | |
response = await client.generate('llama3.2', 'Why is the sky blue?') | |
print(response['response']) | |
if __name__ == '__main__': | |
try: | |
asyncio.run(main()) | |
except KeyboardInterrupt: | |
print('\nGoodbye!') | |
================================================ | |
File: /examples/async-structured-outputs.py | |
================================================ | |
from pydantic import BaseModel | |
from ollama import AsyncClient | |
import asyncio | |
# Define the schema for the response | |
class FriendInfo(BaseModel): | |
name: str | |
age: int | |
is_available: bool | |
class FriendList(BaseModel): | |
friends: list[FriendInfo] | |
async def main(): | |
client = AsyncClient() | |
response = await client.chat( | |
model='llama3.1:8b', | |
messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}], | |
format=FriendList.model_json_schema(), # Use Pydantic to generate the schema | |
options={'temperature': 0}, # Make responses more deterministic | |
) | |
# Use Pydantic to validate the response | |
friends_response = FriendList.model_validate_json(response.message.content) | |
print(friends_response) | |
if __name__ == '__main__': | |
asyncio.run(main()) | |
================================================ | |
File: /examples/async-tools.py | |
================================================ | |
import asyncio | |
from ollama import ChatResponse | |
import ollama | |
def add_two_numbers(a: int, b: int) -> int: | |
""" | |
Add two numbers | |
Args: | |
a (int): The first number | |
b (int): The second number | |
Returns: | |
int: The sum of the two numbers | |
""" | |
return a + b | |
def subtract_two_numbers(a: int, b: int) -> int: | |
""" | |
Subtract two numbers | |
""" | |
return a - b | |
# Tools can still be manually defined and passed into chat | |
subtract_two_numbers_tool = { | |
'type': 'function', | |
'function': { | |
'name': 'subtract_two_numbers', | |
'description': 'Subtract two numbers', | |
'parameters': { | |
'type': 'object', | |
'required': ['a', 'b'], | |
'properties': { | |
'a': {'type': 'integer', 'description': 'The first number'}, | |
'b': {'type': 'integer', 'description': 'The second number'}, | |
}, | |
}, | |
}, | |
} | |
messages = [{'role': 'user', 'content': 'What is three plus one?'}] | |
print('Prompt:', messages[0]['content']) | |
available_functions = { | |
'add_two_numbers': add_two_numbers, | |
'subtract_two_numbers': subtract_two_numbers, | |
} | |
async def main(): | |
client = ollama.AsyncClient() | |
response: ChatResponse = await client.chat( | |
'llama3.1', | |
messages=messages, | |
tools=[add_two_numbers, subtract_two_numbers_tool], | |
) | |
if response.message.tool_calls: | |
# There may be multiple tool calls in the response | |
for tool in response.message.tool_calls: | |
# Ensure the function is available, and then call it | |
if function_to_call := available_functions.get(tool.function.name): | |
print('Calling function:', tool.function.name) | |
print('Arguments:', tool.function.arguments) | |
output = function_to_call(**tool.function.arguments) | |
print('Function output:', output) | |
else: | |
print('Function', tool.function.name, 'not found') | |
# Only needed to chat with the model using the tool call results | |
if response.message.tool_calls: | |
# Add the function response to messages for the model to use | |
messages.append(response.message) | |
messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name}) | |
# Get final response from model with function outputs | |
final_response = await client.chat('llama3.1', messages=messages) | |
print('Final response:', final_response.message.content) | |
else: | |
print('No tool calls returned from model') | |
if __name__ == '__main__': | |
try: | |
asyncio.run(main()) | |
except KeyboardInterrupt: | |
print('\nGoodbye!') | |
================================================ | |
File: /examples/chat-stream.py | |
================================================ | |
from ollama import chat | |
messages = [ | |
{ | |
'role': 'user', | |
'content': 'Why is the sky blue?', | |
}, | |
] | |
for part in chat('llama3.2', messages=messages, stream=True): | |
print(part['message']['content'], end='', flush=True) | |
print() | |
================================================ | |
File: /examples/chat-with-history.py | |
================================================ | |
from ollama import chat | |
messages = [ | |
{ | |
'role': 'user', | |
'content': 'Why is the sky blue?', | |
}, | |
{ | |
'role': 'assistant', | |
'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.", | |
}, | |
{ | |
'role': 'user', | |
'content': 'What is the weather in Tokyo?', | |
}, | |
{ | |
'role': 'assistant', | |
'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.', | |
}, | |
] | |
while True: | |
user_input = input('Chat with history: ') | |
response = chat( | |
'llama3.2', | |
messages=messages | |
+ [ | |
{'role': 'user', 'content': user_input}, | |
], | |
) | |
# Add the response to the messages to maintain the history | |
messages += [ | |
{'role': 'user', 'content': user_input}, | |
{'role': 'assistant', 'content': response.message.content}, | |
] | |
print(response.message.content + '\n') | |
================================================ | |
File: /examples/chat.py | |
================================================ | |
from ollama import chat | |
messages = [ | |
{ | |
'role': 'user', | |
'content': 'Why is the sky blue?', | |
}, | |
] | |
response = chat('llama3.2', messages=messages) | |
print(response['message']['content']) | |
================================================ | |
File: /examples/create.py | |
================================================ | |
import sys | |
from ollama import create | |
args = sys.argv[1:] | |
if len(args) == 2: | |
# create from local file | |
path = args[1] | |
else: | |
print('usage: python create.py <name> <filepath>') | |
sys.exit(1) | |
# TODO: update to real Modelfile values | |
modelfile = f""" | |
FROM {path} | |
""" | |
example_modelfile = """ | |
FROM llama3.2 | |
# sets the temperature to 1 [higher is more creative, lower is more coherent] | |
PARAMETER temperature 1 | |
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token | |
PARAMETER num_ctx 4096 | |
# sets a custom system message to specify the behavior of the chat assistant | |
SYSTEM You are Mario from super mario bros, acting as an assistant. | |
""" | |
for response in create(model=args[0], modelfile=modelfile, stream=True): | |
print(response['status']) | |
================================================ | |
File: /examples/embed.py | |
================================================ | |
from ollama import embed | |
response = embed(model='llama3.2', input='Hello, world!') | |
print(response['embeddings']) | |
================================================ | |
File: /examples/fill-in-middle.py | |
================================================ | |
from ollama import generate | |
prompt = '''def remove_non_ascii(s: str) -> str: | |
""" ''' | |
suffix = """ | |
return result | |
""" | |
response = generate( | |
model='codellama:7b-code', | |
prompt=prompt, | |
suffix=suffix, | |
options={ | |
'num_predict': 128, | |
'temperature': 0, | |
'top_p': 0.9, | |
'stop': ['<EOT>'], | |
}, | |
) | |
print(response['response']) | |
================================================ | |
File: /examples/generate-stream.py | |
================================================ | |
from ollama import generate | |
for part in generate('llama3.2', 'Why is the sky blue?', stream=True): | |
print(part['response'], end='', flush=True) | |
================================================ | |
File: /examples/generate.py | |
================================================ | |
from ollama import generate | |
response = generate('llama3.2', 'Why is the sky blue?') | |
print(response['response']) | |
================================================ | |
File: /examples/list.py | |
================================================ | |
from ollama import list | |
from ollama import ListResponse | |
response: ListResponse = list() | |
for model in response.models: | |
print('Name:', model.model) | |
print(' Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}') | |
if model.details: | |
print(' Format:', model.details.format) | |
print(' Family:', model.details.family) | |
print(' Parameter Size:', model.details.parameter_size) | |
print(' Quantization Level:', model.details.quantization_level) | |
print('\n') | |
================================================ | |
File: /examples/multimodal-chat.py | |
================================================ | |
from ollama import chat | |
# from pathlib import Path | |
# Pass in the path to the image | |
path = input('Please enter the path to the image: ') | |
# You can also pass in base64 encoded image data | |
# img = base64.b64encode(Path(path).read_bytes()).decode() | |
# or the raw bytes | |
# img = Path(path).read_bytes() | |
response = chat( | |
model='llama3.2-vision', | |
messages=[ | |
{ | |
'role': 'user', | |
'content': 'What is in this image? Be concise.', | |
'images': [path], | |
} | |
], | |
) | |
print(response.message.content) | |
================================================ | |
File: /examples/multimodal-generate.py | |
================================================ | |
import sys | |
import random | |
import httpx | |
from ollama import generate | |
latest = httpx.get('https://xkcd.com/info.0.json') | |
latest.raise_for_status() | |
if len(sys.argv) > 1: | |
num = int(sys.argv[1]) | |
else: | |
num = random.randint(1, latest.json().get('num')) | |
comic = httpx.get(f'https://xkcd.com/{num}/info.0.json') | |
comic.raise_for_status() | |
print(f'xkcd #{comic.json().get("num")}: {comic.json().get("alt")}') | |
print(f'link: https://xkcd.com/{num}') | |
print('---') | |
raw = httpx.get(comic.json().get('img')) | |
raw.raise_for_status() | |
for response in generate('llava', 'explain this comic:', images=[raw.content], stream=True): | |
print(response['response'], end='', flush=True) | |
print() | |
================================================ | |
File: /examples/ps.py | |
================================================ | |
from ollama import ps, pull, chat | |
from ollama import ProcessResponse | |
# Ensure at least one model is loaded | |
response = pull('llama3.2', stream=True) | |
progress_states = set() | |
for progress in response: | |
if progress.get('status') in progress_states: | |
continue | |
progress_states.add(progress.get('status')) | |
print(progress.get('status')) | |
print('\n') | |
print('Waiting for model to load... \n') | |
chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}]) | |
response: ProcessResponse = ps() | |
for model in response.models: | |
print('Model: ', model.model) | |
print(' Digest: ', model.digest) | |
print(' Expires at: ', model.expires_at) | |
print(' Size: ', model.size) | |
print(' Size vram: ', model.size_vram) | |
print(' Details: ', model.details) | |
print('\n') | |
================================================ | |
File: /examples/pull.py | |
================================================ | |
from tqdm import tqdm | |
from ollama import pull | |
current_digest, bars = '', {} | |
for progress in pull('llama3.2', stream=True): | |
digest = progress.get('digest', '') | |
if digest != current_digest and current_digest in bars: | |
bars[current_digest].close() | |
if not digest: | |
print(progress.get('status')) | |
continue | |
if digest not in bars and (total := progress.get('total')): | |
bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True) | |
if completed := progress.get('completed'): | |
bars[digest].update(completed - bars[digest].n) | |
current_digest = digest | |
================================================ | |
File: /examples/structured-outputs-image.py | |
================================================ | |
from pathlib import Path | |
from pydantic import BaseModel | |
from typing import Literal | |
from ollama import chat | |
# Define the schema for image objects | |
class Object(BaseModel): | |
name: str | |
confidence: float | |
attributes: str | |
class ImageDescription(BaseModel): | |
summary: str | |
objects: list[Object] | |
scene: str | |
colors: list[str] | |
time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night'] | |
setting: Literal['Indoor', 'Outdoor', 'Unknown'] | |
text_content: str | None = None | |
# Get path from user input | |
path = input('Enter the path to your image: ') | |
path = Path(path) | |
# Verify the file exists | |
if not path.exists(): | |
raise FileNotFoundError(f'Image not found at: {path}') | |
# Set up chat as usual | |
response = chat( | |
model='llama3.2-vision', | |
format=ImageDescription.model_json_schema(), # Pass in the schema for the response | |
messages=[ | |
{ | |
'role': 'user', | |
'content': 'Analyze this image and return a detailed JSON description including objects, scene, colors and any text detected. If you cannot determine certain details, leave those fields empty.', | |
'images': [path], | |
}, | |
], | |
options={'temperature': 0}, # Set temperature to 0 for more deterministic output | |
) | |
# Convert received content to the schema | |
image_analysis = ImageDescription.model_validate_json(response.message.content) | |
print(image_analysis) | |
================================================ | |
File: /examples/structured-outputs.py | |
================================================ | |
from ollama import chat | |
from pydantic import BaseModel | |
# Define the schema for the response | |
class FriendInfo(BaseModel): | |
name: str | |
age: int | |
is_available: bool | |
class FriendList(BaseModel): | |
friends: list[FriendInfo] | |
# schema = {'type': 'object', 'properties': {'friends': {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'age': {'type': 'integer'}, 'is_available': {'type': 'boolean'}}, 'required': ['name', 'age', 'is_available']}}}, 'required': ['friends']} | |
response = chat( | |
model='llama3.1:8b', | |
messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}], | |
format=FriendList.model_json_schema(), # Use Pydantic to generate the schema or format=schema | |
options={'temperature': 0}, # Make responses more deterministic | |
) | |
# Use Pydantic to validate the response | |
friends_response = FriendList.model_validate_json(response.message.content) | |
print(friends_response) | |
================================================ | |
File: /examples/tools.py | |
================================================ | |
from ollama import chat | |
from ollama import ChatResponse | |
def add_two_numbers(a: int, b: int) -> int: | |
""" | |
Add two numbers | |
Args: | |
a (int): The first number | |
b (int): The second number | |
Returns: | |
int: The sum of the two numbers | |
""" | |
return a + b | |
def subtract_two_numbers(a: int, b: int) -> int: | |
""" | |
Subtract two numbers | |
""" | |
return a - b | |
# Tools can still be manually defined and passed into chat | |
subtract_two_numbers_tool = { | |
'type': 'function', | |
'function': { | |
'name': 'subtract_two_numbers', | |
'description': 'Subtract two numbers', | |
'parameters': { | |
'type': 'object', | |
'required': ['a', 'b'], | |
'properties': { | |
'a': {'type': 'integer', 'description': 'The first number'}, | |
'b': {'type': 'integer', 'description': 'The second number'}, | |
}, | |
}, | |
}, | |
} | |
messages = [{'role': 'user', 'content': 'What is three plus one?'}] | |
print('Prompt:', messages[0]['content']) | |
available_functions = { | |
'add_two_numbers': add_two_numbers, | |
'subtract_two_numbers': subtract_two_numbers, | |
} | |
response: ChatResponse = chat( | |
'llama3.1', | |
messages=messages, | |
tools=[add_two_numbers, subtract_two_numbers_tool], | |
) | |
if response.message.tool_calls: | |
# There may be multiple tool calls in the response | |
for tool in response.message.tool_calls: | |
# Ensure the function is available, and then call it | |
if function_to_call := available_functions.get(tool.function.name): | |
print('Calling function:', tool.function.name) | |
print('Arguments:', tool.function.arguments) | |
output = function_to_call(**tool.function.arguments) | |
print('Function output:', output) | |
else: | |
print('Function', tool.function.name, 'not found') | |
# Only needed to chat with the model using the tool call results | |
if response.message.tool_calls: | |
# Add the function response to messages for the model to use | |
messages.append(response.message) | |
messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name}) | |
# Get final response from model with function outputs | |
final_response = chat('llama3.1', messages=messages) | |
print('Final response:', final_response.message.content) | |
else: | |
print('No tool calls returned from model') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment