Skip to content

Instantly share code, notes, and snippets.

@CodeWithBehnam
Last active January 3, 2025 15:18
Show Gist options
  • Save CodeWithBehnam/9b1c9bae94158775e44ce7900b320560 to your computer and use it in GitHub Desktop.
Save CodeWithBehnam/9b1c9bae94158775e44ce7900b320560 to your computer and use it in GitHub Desktop.
Ollama Python Examples
================================================
File: /examples/README.md
================================================
# Running Examples
Run the examples in this directory with:
```sh
# Run example
python3 examples/<example>.py
```
### Chat - Chat with a model
- [chat.py](chat.py)
- [async-chat.py](async-chat.py)
- [chat-stream.py](chat-stream.py) - Streamed outputs
- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation
### Generate - Generate text with a model
- [generate.py](generate.py)
- [async-generate.py](async-generate.py)
- [generate-stream.py](generate-stream.py) - Streamed outputs
- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle
### Tools/Function Calling - Call a function with a model
- [tools.py](tools.py) - Simple example of Tools/Function Calling
- [async-tools.py](async-tools.py)
### Multimodal with Images - Chat with a multimodal (image chat) model
- [multimodal_chat.py](multimodal_chat.py)
- [multimodal_generate.py](multimodal_generate.py)
### Structured Outputs - Generate structured outputs with a model
- [structured-outputs.py](structured-outputs.py)
- [async-structured-outputs.py](async-structured-outputs.py)
- [structured-outputs-image.py](structured-outputs-image.py)
### Ollama List - List all downloaded models and their properties
- [list.py](list.py)
### Ollama ps - Show model status with CPU/GPU usage
- [ps.py](ps.py)
### Ollama Pull - Pull a model from Ollama
Requirement: `pip install tqdm`
- [pull.py](pull.py)
### Ollama Create - Create a model from a Modelfile
```python
python create.py <model> <modelfile>
```
- [create.py](create.py)
See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.
### Ollama Embed - Generate embeddings with a model
- [embed.py](embed.py)
================================================
File: /examples/async-chat.py
================================================
import asyncio
from ollama import AsyncClient
async def main():
messages = [
{
'role': 'user',
'content': 'Why is the sky blue?',
},
]
client = AsyncClient()
response = await client.chat('llama3.2', messages=messages)
print(response['message']['content'])
if __name__ == '__main__':
asyncio.run(main())
================================================
File: /examples/async-generate.py
================================================
import asyncio
import ollama
async def main():
client = ollama.AsyncClient()
response = await client.generate('llama3.2', 'Why is the sky blue?')
print(response['response'])
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print('\nGoodbye!')
================================================
File: /examples/async-structured-outputs.py
================================================
from pydantic import BaseModel
from ollama import AsyncClient
import asyncio
# Define the schema for the response
class FriendInfo(BaseModel):
name: str
age: int
is_available: bool
class FriendList(BaseModel):
friends: list[FriendInfo]
async def main():
client = AsyncClient()
response = await client.chat(
model='llama3.1:8b',
messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
format=FriendList.model_json_schema(), # Use Pydantic to generate the schema
options={'temperature': 0}, # Make responses more deterministic
)
# Use Pydantic to validate the response
friends_response = FriendList.model_validate_json(response.message.content)
print(friends_response)
if __name__ == '__main__':
asyncio.run(main())
================================================
File: /examples/async-tools.py
================================================
import asyncio
from ollama import ChatResponse
import ollama
def add_two_numbers(a: int, b: int) -> int:
"""
Add two numbers
Args:
a (int): The first number
b (int): The second number
Returns:
int: The sum of the two numbers
"""
return a + b
def subtract_two_numbers(a: int, b: int) -> int:
"""
Subtract two numbers
"""
return a - b
# Tools can still be manually defined and passed into chat
subtract_two_numbers_tool = {
'type': 'function',
'function': {
'name': 'subtract_two_numbers',
'description': 'Subtract two numbers',
'parameters': {
'type': 'object',
'required': ['a', 'b'],
'properties': {
'a': {'type': 'integer', 'description': 'The first number'},
'b': {'type': 'integer', 'description': 'The second number'},
},
},
},
}
messages = [{'role': 'user', 'content': 'What is three plus one?'}]
print('Prompt:', messages[0]['content'])
available_functions = {
'add_two_numbers': add_two_numbers,
'subtract_two_numbers': subtract_two_numbers,
}
async def main():
client = ollama.AsyncClient()
response: ChatResponse = await client.chat(
'llama3.1',
messages=messages,
tools=[add_two_numbers, subtract_two_numbers_tool],
)
if response.message.tool_calls:
# There may be multiple tool calls in the response
for tool in response.message.tool_calls:
# Ensure the function is available, and then call it
if function_to_call := available_functions.get(tool.function.name):
print('Calling function:', tool.function.name)
print('Arguments:', tool.function.arguments)
output = function_to_call(**tool.function.arguments)
print('Function output:', output)
else:
print('Function', tool.function.name, 'not found')
# Only needed to chat with the model using the tool call results
if response.message.tool_calls:
# Add the function response to messages for the model to use
messages.append(response.message)
messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})
# Get final response from model with function outputs
final_response = await client.chat('llama3.1', messages=messages)
print('Final response:', final_response.message.content)
else:
print('No tool calls returned from model')
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
print('\nGoodbye!')
================================================
File: /examples/chat-stream.py
================================================
from ollama import chat
messages = [
{
'role': 'user',
'content': 'Why is the sky blue?',
},
]
for part in chat('llama3.2', messages=messages, stream=True):
print(part['message']['content'], end='', flush=True)
print()
================================================
File: /examples/chat-with-history.py
================================================
from ollama import chat
messages = [
{
'role': 'user',
'content': 'Why is the sky blue?',
},
{
'role': 'assistant',
'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.",
},
{
'role': 'user',
'content': 'What is the weather in Tokyo?',
},
{
'role': 'assistant',
'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
},
]
while True:
user_input = input('Chat with history: ')
response = chat(
'llama3.2',
messages=messages
+ [
{'role': 'user', 'content': user_input},
],
)
# Add the response to the messages to maintain the history
messages += [
{'role': 'user', 'content': user_input},
{'role': 'assistant', 'content': response.message.content},
]
print(response.message.content + '\n')
================================================
File: /examples/chat.py
================================================
from ollama import chat
messages = [
{
'role': 'user',
'content': 'Why is the sky blue?',
},
]
response = chat('llama3.2', messages=messages)
print(response['message']['content'])
================================================
File: /examples/create.py
================================================
import sys
from ollama import create
args = sys.argv[1:]
if len(args) == 2:
# create from local file
path = args[1]
else:
print('usage: python create.py <name> <filepath>')
sys.exit(1)
# TODO: update to real Modelfile values
modelfile = f"""
FROM {path}
"""
example_modelfile = """
FROM llama3.2
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
PARAMETER num_ctx 4096
# sets a custom system message to specify the behavior of the chat assistant
SYSTEM You are Mario from super mario bros, acting as an assistant.
"""
for response in create(model=args[0], modelfile=modelfile, stream=True):
print(response['status'])
================================================
File: /examples/embed.py
================================================
from ollama import embed
response = embed(model='llama3.2', input='Hello, world!')
print(response['embeddings'])
================================================
File: /examples/fill-in-middle.py
================================================
from ollama import generate
prompt = '''def remove_non_ascii(s: str) -> str:
""" '''
suffix = """
return result
"""
response = generate(
model='codellama:7b-code',
prompt=prompt,
suffix=suffix,
options={
'num_predict': 128,
'temperature': 0,
'top_p': 0.9,
'stop': ['<EOT>'],
},
)
print(response['response'])
================================================
File: /examples/generate-stream.py
================================================
from ollama import generate
for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
print(part['response'], end='', flush=True)
================================================
File: /examples/generate.py
================================================
from ollama import generate
response = generate('llama3.2', 'Why is the sky blue?')
print(response['response'])
================================================
File: /examples/list.py
================================================
from ollama import list
from ollama import ListResponse
response: ListResponse = list()
for model in response.models:
print('Name:', model.model)
print(' Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}')
if model.details:
print(' Format:', model.details.format)
print(' Family:', model.details.family)
print(' Parameter Size:', model.details.parameter_size)
print(' Quantization Level:', model.details.quantization_level)
print('\n')
================================================
File: /examples/multimodal-chat.py
================================================
from ollama import chat
# from pathlib import Path
# Pass in the path to the image
path = input('Please enter the path to the image: ')
# You can also pass in base64 encoded image data
# img = base64.b64encode(Path(path).read_bytes()).decode()
# or the raw bytes
# img = Path(path).read_bytes()
response = chat(
model='llama3.2-vision',
messages=[
{
'role': 'user',
'content': 'What is in this image? Be concise.',
'images': [path],
}
],
)
print(response.message.content)
================================================
File: /examples/multimodal-generate.py
================================================
import sys
import random
import httpx
from ollama import generate
latest = httpx.get('https://xkcd.com/info.0.json')
latest.raise_for_status()
if len(sys.argv) > 1:
num = int(sys.argv[1])
else:
num = random.randint(1, latest.json().get('num'))
comic = httpx.get(f'https://xkcd.com/{num}/info.0.json')
comic.raise_for_status()
print(f'xkcd #{comic.json().get("num")}: {comic.json().get("alt")}')
print(f'link: https://xkcd.com/{num}')
print('---')
raw = httpx.get(comic.json().get('img'))
raw.raise_for_status()
for response in generate('llava', 'explain this comic:', images=[raw.content], stream=True):
print(response['response'], end='', flush=True)
print()
================================================
File: /examples/ps.py
================================================
from ollama import ps, pull, chat
from ollama import ProcessResponse
# Ensure at least one model is loaded
response = pull('llama3.2', stream=True)
progress_states = set()
for progress in response:
if progress.get('status') in progress_states:
continue
progress_states.add(progress.get('status'))
print(progress.get('status'))
print('\n')
print('Waiting for model to load... \n')
chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
response: ProcessResponse = ps()
for model in response.models:
print('Model: ', model.model)
print(' Digest: ', model.digest)
print(' Expires at: ', model.expires_at)
print(' Size: ', model.size)
print(' Size vram: ', model.size_vram)
print(' Details: ', model.details)
print('\n')
================================================
File: /examples/pull.py
================================================
from tqdm import tqdm
from ollama import pull
current_digest, bars = '', {}
for progress in pull('llama3.2', stream=True):
digest = progress.get('digest', '')
if digest != current_digest and current_digest in bars:
bars[current_digest].close()
if not digest:
print(progress.get('status'))
continue
if digest not in bars and (total := progress.get('total')):
bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True)
if completed := progress.get('completed'):
bars[digest].update(completed - bars[digest].n)
current_digest = digest
================================================
File: /examples/structured-outputs-image.py
================================================
from pathlib import Path
from pydantic import BaseModel
from typing import Literal
from ollama import chat
# Define the schema for image objects
class Object(BaseModel):
name: str
confidence: float
attributes: str
class ImageDescription(BaseModel):
summary: str
objects: list[Object]
scene: str
colors: list[str]
time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night']
setting: Literal['Indoor', 'Outdoor', 'Unknown']
text_content: str | None = None
# Get path from user input
path = input('Enter the path to your image: ')
path = Path(path)
# Verify the file exists
if not path.exists():
raise FileNotFoundError(f'Image not found at: {path}')
# Set up chat as usual
response = chat(
model='llama3.2-vision',
format=ImageDescription.model_json_schema(), # Pass in the schema for the response
messages=[
{
'role': 'user',
'content': 'Analyze this image and return a detailed JSON description including objects, scene, colors and any text detected. If you cannot determine certain details, leave those fields empty.',
'images': [path],
},
],
options={'temperature': 0}, # Set temperature to 0 for more deterministic output
)
# Convert received content to the schema
image_analysis = ImageDescription.model_validate_json(response.message.content)
print(image_analysis)
================================================
File: /examples/structured-outputs.py
================================================
from ollama import chat
from pydantic import BaseModel
# Define the schema for the response
class FriendInfo(BaseModel):
name: str
age: int
is_available: bool
class FriendList(BaseModel):
friends: list[FriendInfo]
# schema = {'type': 'object', 'properties': {'friends': {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'age': {'type': 'integer'}, 'is_available': {'type': 'boolean'}}, 'required': ['name', 'age', 'is_available']}}}, 'required': ['friends']}
response = chat(
model='llama3.1:8b',
messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
format=FriendList.model_json_schema(), # Use Pydantic to generate the schema or format=schema
options={'temperature': 0}, # Make responses more deterministic
)
# Use Pydantic to validate the response
friends_response = FriendList.model_validate_json(response.message.content)
print(friends_response)
================================================
File: /examples/tools.py
================================================
from ollama import chat
from ollama import ChatResponse
def add_two_numbers(a: int, b: int) -> int:
"""
Add two numbers
Args:
a (int): The first number
b (int): The second number
Returns:
int: The sum of the two numbers
"""
return a + b
def subtract_two_numbers(a: int, b: int) -> int:
"""
Subtract two numbers
"""
return a - b
# Tools can still be manually defined and passed into chat
subtract_two_numbers_tool = {
'type': 'function',
'function': {
'name': 'subtract_two_numbers',
'description': 'Subtract two numbers',
'parameters': {
'type': 'object',
'required': ['a', 'b'],
'properties': {
'a': {'type': 'integer', 'description': 'The first number'},
'b': {'type': 'integer', 'description': 'The second number'},
},
},
},
}
messages = [{'role': 'user', 'content': 'What is three plus one?'}]
print('Prompt:', messages[0]['content'])
available_functions = {
'add_two_numbers': add_two_numbers,
'subtract_two_numbers': subtract_two_numbers,
}
response: ChatResponse = chat(
'llama3.1',
messages=messages,
tools=[add_two_numbers, subtract_two_numbers_tool],
)
if response.message.tool_calls:
# There may be multiple tool calls in the response
for tool in response.message.tool_calls:
# Ensure the function is available, and then call it
if function_to_call := available_functions.get(tool.function.name):
print('Calling function:', tool.function.name)
print('Arguments:', tool.function.arguments)
output = function_to_call(**tool.function.arguments)
print('Function output:', output)
else:
print('Function', tool.function.name, 'not found')
# Only needed to chat with the model using the tool call results
if response.message.tool_calls:
# Add the function response to messages for the model to use
messages.append(response.message)
messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})
# Get final response from model with function outputs
final_response = chat('llama3.1', messages=messages)
print('Final response:', final_response.message.content)
else:
print('No tool calls returned from model')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment