CodeWithBehnam · January 3, 2025 15:18
diff --git a/gistfile1.txt b/gistfile1.txt
 ================================================
 File: /examples/README.md
 ================================================
 # Running Examples

 Run the examples in this directory with:
 ```sh
 # Run example
 python3 examples/<example>.py
 ```

 ### Chat - Chat with a model
 - [chat.py](chat.py)
 - [async-chat.py](async-chat.py)
 - [chat-stream.py](chat-stream.py) - Streamed outputs
 - [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation


 ### Generate - Generate text with a model
 - [generate.py](generate.py)
 - [async-generate.py](async-generate.py)
 - [generate-stream.py](generate-stream.py) - Streamed outputs
 - [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle


 ### Tools/Function Calling - Call a function with a model
 - [tools.py](tools.py) - Simple example of Tools/Function Calling
 - [async-tools.py](async-tools.py)


 ### Multimodal with Images - Chat with a multimodal (image chat) model
 - [multimodal_chat.py](multimodal_chat.py)
 - [multimodal_generate.py](multimodal_generate.py)


 ### Structured Outputs - Generate structured outputs with a model
 - [structured-outputs.py](structured-outputs.py)
 - [async-structured-outputs.py](async-structured-outputs.py)
 - [structured-outputs-image.py](structured-outputs-image.py)


 ### Ollama List - List all downloaded models and their properties
 - [list.py](list.py)


 ### Ollama ps - Show model status with CPU/GPU usage
 - [ps.py](ps.py)


 ### Ollama Pull - Pull a model from Ollama
 Requirement: `pip install tqdm`
 - [pull.py](pull.py) 


 ### Ollama Create - Create a model from a Modelfile
 ```python
 python create.py <model> <modelfile>
 ```
 - [create.py](create.py) 

 See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.


 ### Ollama Embed - Generate embeddings with a model
 - [embed.py](embed.py)



 ================================================
 File: /examples/async-chat.py
 ================================================
 import asyncio
 from ollama import AsyncClient


 async def main():
  messages = [
    {
      'role': 'user',
      'content': 'Why is the sky blue?',
    },
  ]

  client = AsyncClient()
  response = await client.chat('llama3.2', messages=messages)
  print(response['message']['content'])


 if __name__ == '__main__':
  asyncio.run(main())


 ================================================
 File: /examples/async-generate.py
 ================================================
 import asyncio
 import ollama


 async def main():
  client = ollama.AsyncClient()
  response = await client.generate('llama3.2', 'Why is the sky blue?')
  print(response['response'])


 if __name__ == '__main__':
  try:
    asyncio.run(main())
  except KeyboardInterrupt:
    print('\nGoodbye!')


 ================================================
 File: /examples/async-structured-outputs.py
 ================================================
 from pydantic import BaseModel
 from ollama import AsyncClient
 import asyncio


 # Define the schema for the response
 class FriendInfo(BaseModel):
  name: str
  age: int
  is_available: bool


 class FriendList(BaseModel):
  friends: list[FriendInfo]


 async def main():
  client = AsyncClient()
  response = await client.chat(
    model='llama3.1:8b',
    messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
    format=FriendList.model_json_schema(),  # Use Pydantic to generate the schema
    options={'temperature': 0},  # Make responses more deterministic
  )

  # Use Pydantic to validate the response
  friends_response = FriendList.model_validate_json(response.message.content)
  print(friends_response)


 if __name__ == '__main__':
  asyncio.run(main())


 ================================================
 File: /examples/async-tools.py
 ================================================
 import asyncio
 from ollama import ChatResponse
 import ollama


 def add_two_numbers(a: int, b: int) -> int:
  """
  Add two numbers

  Args:
    a (int): The first number
    b (int): The second number

  Returns:
    int: The sum of the two numbers
  """
  return a + b


 def subtract_two_numbers(a: int, b: int) -> int:
  """
  Subtract two numbers
  """
  return a - b


 # Tools can still be manually defined and passed into chat
 subtract_two_numbers_tool = {
  'type': 'function',
  'function': {
    'name': 'subtract_two_numbers',
    'description': 'Subtract two numbers',
    'parameters': {
      'type': 'object',
      'required': ['a', 'b'],
      'properties': {
        'a': {'type': 'integer', 'description': 'The first number'},
        'b': {'type': 'integer', 'description': 'The second number'},
      },
    },
  },
 }

 messages = [{'role': 'user', 'content': 'What is three plus one?'}]
 print('Prompt:', messages[0]['content'])

 available_functions = {
  'add_two_numbers': add_two_numbers,
  'subtract_two_numbers': subtract_two_numbers,
 }


 async def main():
  client = ollama.AsyncClient()

  response: ChatResponse = await client.chat(
    'llama3.1',
    messages=messages,
    tools=[add_two_numbers, subtract_two_numbers_tool],
  )

  if response.message.tool_calls:
    # There may be multiple tool calls in the response
    for tool in response.message.tool_calls:
      # Ensure the function is available, and then call it
      if function_to_call := available_functions.get(tool.function.name):
        print('Calling function:', tool.function.name)
        print('Arguments:', tool.function.arguments)
        output = function_to_call(**tool.function.arguments)
        print('Function output:', output)
      else:
        print('Function', tool.function.name, 'not found')

  # Only needed to chat with the model using the tool call results
  if response.message.tool_calls:
    # Add the function response to messages for the model to use
    messages.append(response.message)
    messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

    # Get final response from model with function outputs
    final_response = await client.chat('llama3.1', messages=messages)
    print('Final response:', final_response.message.content)

  else:
    print('No tool calls returned from model')


 if __name__ == '__main__':
  try:
    asyncio.run(main())
  except KeyboardInterrupt:
    print('\nGoodbye!')


 ================================================
 File: /examples/chat-stream.py
 ================================================
 from ollama import chat


 messages = [
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ]

 for part in chat('llama3.2', messages=messages, stream=True):
  print(part['message']['content'], end='', flush=True)

 print()


 ================================================
 File: /examples/chat-with-history.py
 ================================================
 from ollama import chat


 messages = [
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
  {
    'role': 'assistant',
    'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.",
  },
  {
    'role': 'user',
    'content': 'What is the weather in Tokyo?',
  },
  {
    'role': 'assistant',
    'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
  },
 ]

 while True:
  user_input = input('Chat with history: ')
  response = chat(
    'llama3.2',
    messages=messages
    + [
      {'role': 'user', 'content': user_input},
    ],
  )

  # Add the response to the messages to maintain the history
  messages += [
    {'role': 'user', 'content': user_input},
    {'role': 'assistant', 'content': response.message.content},
  ]
  print(response.message.content + '\n')


 ================================================
 File: /examples/chat.py
 ================================================
 from ollama import chat

 messages = [
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
 ]

 response = chat('llama3.2', messages=messages)
 print(response['message']['content'])


 ================================================
 File: /examples/create.py
 ================================================
 import sys

 from ollama import create


 args = sys.argv[1:]
 if len(args) == 2:
  # create from local file
  path = args[1]
 else:
  print('usage: python create.py <name> <filepath>')
  sys.exit(1)

 # TODO: update to real Modelfile values
 modelfile = f"""
 FROM {path}
 """
 example_modelfile = """
 FROM llama3.2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
 PARAMETER num_ctx 4096

 # sets a custom system message to specify the behavior of the chat assistant
 SYSTEM You are Mario from super mario bros, acting as an assistant.
 """

 for response in create(model=args[0], modelfile=modelfile, stream=True):
  print(response['status'])


 ================================================
 File: /examples/embed.py
 ================================================
 from ollama import embed

 response = embed(model='llama3.2', input='Hello, world!')
 print(response['embeddings'])


 ================================================
 File: /examples/fill-in-middle.py
 ================================================
 from ollama import generate

 prompt = '''def remove_non_ascii(s: str) -> str:
    """ '''

 suffix = """
    return result
 """

 response = generate(
  model='codellama:7b-code',
  prompt=prompt,
  suffix=suffix,
  options={
    'num_predict': 128,
    'temperature': 0,
    'top_p': 0.9,
    'stop': ['<EOT>'],
  },
 )

 print(response['response'])


 ================================================
 File: /examples/generate-stream.py
 ================================================
 from ollama import generate


 for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
  print(part['response'], end='', flush=True)


 ================================================
 File: /examples/generate.py
 ================================================
 from ollama import generate


 response = generate('llama3.2', 'Why is the sky blue?')
 print(response['response'])


 ================================================
 File: /examples/list.py
 ================================================
 from ollama import list
 from ollama import ListResponse

 response: ListResponse = list()

 for model in response.models:
  print('Name:', model.model)
  print('  Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}')
  if model.details:
    print('  Format:', model.details.format)
    print('  Family:', model.details.family)
    print('  Parameter Size:', model.details.parameter_size)
    print('  Quantization Level:', model.details.quantization_level)
  print('\n')


 ================================================
 File: /examples/multimodal-chat.py
 ================================================
 from ollama import chat
 # from pathlib import Path

 # Pass in the path to the image
 path = input('Please enter the path to the image: ')

 # You can also pass in base64 encoded image data
 # img = base64.b64encode(Path(path).read_bytes()).decode()
 # or the raw bytes
 # img = Path(path).read_bytes()

 response = chat(
  model='llama3.2-vision',
  messages=[
    {
      'role': 'user',
      'content': 'What is in this image? Be concise.',
      'images': [path],
    }
  ],
 )

 print(response.message.content)


 ================================================
 File: /examples/multimodal-generate.py
 ================================================
 import sys
 import random
 import httpx

 from ollama import generate


 latest = httpx.get('https://xkcd.com/info.0.json')
 latest.raise_for_status()

 if len(sys.argv) > 1:
  num = int(sys.argv[1])
 else:
  num = random.randint(1, latest.json().get('num'))

 comic = httpx.get(f'https://xkcd.com/{num}/info.0.json')
 comic.raise_for_status()

 print(f'xkcd #{comic.json().get("num")}: {comic.json().get("alt")}')
 print(f'link: https://xkcd.com/{num}')
 print('---')

 raw = httpx.get(comic.json().get('img'))
 raw.raise_for_status()

 for response in generate('llava', 'explain this comic:', images=[raw.content], stream=True):
  print(response['response'], end='', flush=True)

 print()


 ================================================
 File: /examples/ps.py
 ================================================
 from ollama import ps, pull, chat
 from ollama import ProcessResponse

 # Ensure at least one model is loaded
 response = pull('llama3.2', stream=True)
 progress_states = set()
 for progress in response:
  if progress.get('status') in progress_states:
    continue
  progress_states.add(progress.get('status'))
  print(progress.get('status'))

 print('\n')

 print('Waiting for model to load... \n')
 chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])


 response: ProcessResponse = ps()
 for model in response.models:
  print('Model: ', model.model)
  print('  Digest: ', model.digest)
  print('  Expires at: ', model.expires_at)
  print('  Size: ', model.size)
  print('  Size vram: ', model.size_vram)
  print('  Details: ', model.details)
  print('\n')


 ================================================
 File: /examples/pull.py
 ================================================
 from tqdm import tqdm
 from ollama import pull


 current_digest, bars = '', {}
 for progress in pull('llama3.2', stream=True):
  digest = progress.get('digest', '')
  if digest != current_digest and current_digest in bars:
    bars[current_digest].close()

  if not digest:
    print(progress.get('status'))
    continue

  if digest not in bars and (total := progress.get('total')):
    bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True)

  if completed := progress.get('completed'):
    bars[digest].update(completed - bars[digest].n)

  current_digest = digest


 ================================================
 File: /examples/structured-outputs-image.py
 ================================================
 from pathlib import Path
 from pydantic import BaseModel
 from typing import Literal
 from ollama import chat


 # Define the schema for image objects
 class Object(BaseModel):
  name: str
  confidence: float
  attributes: str


 class ImageDescription(BaseModel):
  summary: str
  objects: list[Object]
  scene: str
  colors: list[str]
  time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night']
  setting: Literal['Indoor', 'Outdoor', 'Unknown']
  text_content: str | None = None


 # Get path from user input
 path = input('Enter the path to your image: ')
 path = Path(path)

 # Verify the file exists
 if not path.exists():
  raise FileNotFoundError(f'Image not found at: {path}')

 # Set up chat as usual
 response = chat(
  model='llama3.2-vision',
  format=ImageDescription.model_json_schema(),  # Pass in the schema for the response
  messages=[
    {
      'role': 'user',
      'content': 'Analyze this image and return a detailed JSON description including objects, scene, colors and any text detected. If you cannot determine certain details, leave those fields empty.',
      'images': [path],
    },
  ],
  options={'temperature': 0},  # Set temperature to 0 for more deterministic output
 )


 # Convert received content to the schema
 image_analysis = ImageDescription.model_validate_json(response.message.content)
 print(image_analysis)


 ================================================
 File: /examples/structured-outputs.py
 ================================================
 from ollama import chat
 from pydantic import BaseModel


 # Define the schema for the response
 class FriendInfo(BaseModel):
  name: str
  age: int
  is_available: bool


 class FriendList(BaseModel):
  friends: list[FriendInfo]


 # schema = {'type': 'object', 'properties': {'friends': {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'age': {'type': 'integer'}, 'is_available': {'type': 'boolean'}}, 'required': ['name', 'age', 'is_available']}}}, 'required': ['friends']}
 response = chat(
  model='llama3.1:8b',
  messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
  format=FriendList.model_json_schema(),  # Use Pydantic to generate the schema or format=schema
  options={'temperature': 0},  # Make responses more deterministic
 )

 # Use Pydantic to validate the response
 friends_response = FriendList.model_validate_json(response.message.content)
 print(friends_response)


 ================================================
 File: /examples/tools.py
 ================================================
 from ollama import chat
 from ollama import ChatResponse


 def add_two_numbers(a: int, b: int) -> int:
  """
  Add two numbers

  Args:
    a (int): The first number
    b (int): The second number

  Returns:
    int: The sum of the two numbers
  """
  return a + b


 def subtract_two_numbers(a: int, b: int) -> int:
  """
  Subtract two numbers
  """
  return a - b


 # Tools can still be manually defined and passed into chat
 subtract_two_numbers_tool = {
  'type': 'function',
  'function': {
    'name': 'subtract_two_numbers',
    'description': 'Subtract two numbers',
    'parameters': {
      'type': 'object',
      'required': ['a', 'b'],
      'properties': {
        'a': {'type': 'integer', 'description': 'The first number'},
        'b': {'type': 'integer', 'description': 'The second number'},
      },
    },
  },
 }

 messages = [{'role': 'user', 'content': 'What is three plus one?'}]
 print('Prompt:', messages[0]['content'])

 available_functions = {
  'add_two_numbers': add_two_numbers,
  'subtract_two_numbers': subtract_two_numbers,
 }

 response: ChatResponse = chat(
  'llama3.1',
  messages=messages,
  tools=[add_two_numbers, subtract_two_numbers_tool],
 )

 if response.message.tool_calls:
  # There may be multiple tool calls in the response
  for tool in response.message.tool_calls:
    # Ensure the function is available, and then call it
    if function_to_call := available_functions.get(tool.function.name):
      print('Calling function:', tool.function.name)
      print('Arguments:', tool.function.arguments)
      output = function_to_call(**tool.function.arguments)
      print('Function output:', output)
    else:
      print('Function', tool.function.name, 'not found')

 # Only needed to chat with the model using the tool call results
 if response.message.tool_calls:
  # Add the function response to messages for the model to use
  messages.append(response.message)
  messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

  # Get final response from model with function outputs
  final_response = chat('llama3.1', messages=messages)
  print('Final response:', final_response.message.content)

 else:
  print('No tool calls returned from model')
	================================================
	File: /examples/README.md
	================================================
	# Running Examples

	Run the examples in this directory with:
	```sh
	# Run example
	python3 examples/<example>.py
	```

	### Chat - Chat with a model
	- [chat.py](chat.py)
	- [async-chat.py](async-chat.py)
	- [chat-stream.py](chat-stream.py) - Streamed outputs
	- [chat-with-history.py](chat-with-history.py) - Chat with model and maintain history of the conversation


	### Generate - Generate text with a model
	- [generate.py](generate.py)
	- [async-generate.py](async-generate.py)
	- [generate-stream.py](generate-stream.py) - Streamed outputs
	- [fill-in-middle.py](fill-in-middle.py) - Given a prefix and suffix, fill in the middle


	### Tools/Function Calling - Call a function with a model
	- [tools.py](tools.py) - Simple example of Tools/Function Calling
	- [async-tools.py](async-tools.py)


	### Multimodal with Images - Chat with a multimodal (image chat) model
	- [multimodal_chat.py](multimodal_chat.py)
	- [multimodal_generate.py](multimodal_generate.py)


	### Structured Outputs - Generate structured outputs with a model
	- [structured-outputs.py](structured-outputs.py)
	- [async-structured-outputs.py](async-structured-outputs.py)
	- [structured-outputs-image.py](structured-outputs-image.py)


	### Ollama List - List all downloaded models and their properties
	- [list.py](list.py)


	### Ollama ps - Show model status with CPU/GPU usage
	- [ps.py](ps.py)


	### Ollama Pull - Pull a model from Ollama
	Requirement: `pip install tqdm`
	- [pull.py](pull.py)


	### Ollama Create - Create a model from a Modelfile
	```python
	python create.py <model> <modelfile>
	```
	- [create.py](create.py)

	See [ollama/docs/modelfile.md](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) for more information on the Modelfile format.


	### Ollama Embed - Generate embeddings with a model
	- [embed.py](embed.py)



	================================================
	File: /examples/async-chat.py
	================================================
	import asyncio
	from ollama import AsyncClient


	async def main():
	messages = [
	{
	'role': 'user',
	'content': 'Why is the sky blue?',
	},
	]

	client = AsyncClient()
	response = await client.chat('llama3.2', messages=messages)
	print(response['message']['content'])


	if __name__ == '__main__':
	asyncio.run(main())


	================================================
	File: /examples/async-generate.py
	================================================
	import asyncio
	import ollama


	async def main():
	client = ollama.AsyncClient()
	response = await client.generate('llama3.2', 'Why is the sky blue?')
	print(response['response'])


	if __name__ == '__main__':
	try:
	asyncio.run(main())
	except KeyboardInterrupt:
	print('\nGoodbye!')


	================================================
	File: /examples/async-structured-outputs.py
	================================================
	from pydantic import BaseModel
	from ollama import AsyncClient
	import asyncio


	# Define the schema for the response
	class FriendInfo(BaseModel):
	name: str
	age: int
	is_available: bool


	class FriendList(BaseModel):
	friends: list[FriendInfo]


	async def main():
	client = AsyncClient()
	response = await client.chat(
	model='llama3.1:8b',
	messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
	format=FriendList.model_json_schema(), # Use Pydantic to generate the schema
	options={'temperature': 0}, # Make responses more deterministic
	)

	# Use Pydantic to validate the response
	friends_response = FriendList.model_validate_json(response.message.content)
	print(friends_response)


	if __name__ == '__main__':
	asyncio.run(main())


	================================================
	File: /examples/async-tools.py
	================================================
	import asyncio
	from ollama import ChatResponse
	import ollama


	def add_two_numbers(a: int, b: int) -> int:
	"""
	Add two numbers

	Args:
	a (int): The first number
	b (int): The second number

	Returns:
	int: The sum of the two numbers
	"""
	return a + b


	def subtract_two_numbers(a: int, b: int) -> int:
	"""
	Subtract two numbers
	"""
	return a - b


	# Tools can still be manually defined and passed into chat
	subtract_two_numbers_tool = {
	'type': 'function',
	'function': {
	'name': 'subtract_two_numbers',
	'description': 'Subtract two numbers',
	'parameters': {
	'type': 'object',
	'required': ['a', 'b'],
	'properties': {
	'a': {'type': 'integer', 'description': 'The first number'},
	'b': {'type': 'integer', 'description': 'The second number'},
	},
	},
	},
	}

	messages = [{'role': 'user', 'content': 'What is three plus one?'}]
	print('Prompt:', messages[0]['content'])

	available_functions = {
	'add_two_numbers': add_two_numbers,
	'subtract_two_numbers': subtract_two_numbers,
	}


	async def main():
	client = ollama.AsyncClient()

	response: ChatResponse = await client.chat(
	'llama3.1',
	messages=messages,
	tools=[add_two_numbers, subtract_two_numbers_tool],
	)

	if response.message.tool_calls:
	# There may be multiple tool calls in the response
	for tool in response.message.tool_calls:
	# Ensure the function is available, and then call it
	if function_to_call := available_functions.get(tool.function.name):
	print('Calling function:', tool.function.name)
	print('Arguments:', tool.function.arguments)
	output = function_to_call(**tool.function.arguments)
	print('Function output:', output)
	else:
	print('Function', tool.function.name, 'not found')

	# Only needed to chat with the model using the tool call results
	if response.message.tool_calls:
	# Add the function response to messages for the model to use
	messages.append(response.message)
	messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

	# Get final response from model with function outputs
	final_response = await client.chat('llama3.1', messages=messages)
	print('Final response:', final_response.message.content)

	else:
	print('No tool calls returned from model')


	if __name__ == '__main__':
	try:
	asyncio.run(main())
	except KeyboardInterrupt:
	print('\nGoodbye!')


	================================================
	File: /examples/chat-stream.py
	================================================
	from ollama import chat


	messages = [
	{
	'role': 'user',
	'content': 'Why is the sky blue?',
	},
	]

	for part in chat('llama3.2', messages=messages, stream=True):
	print(part['message']['content'], end='', flush=True)

	print()


	================================================
	File: /examples/chat-with-history.py
	================================================
	from ollama import chat


	messages = [
	{
	'role': 'user',
	'content': 'Why is the sky blue?',
	},
	{
	'role': 'assistant',
	'content': "The sky is blue because of the way the Earth's atmosphere scatters sunlight.",
	},
	{
	'role': 'user',
	'content': 'What is the weather in Tokyo?',
	},
	{
	'role': 'assistant',
	'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
	},
	]

	while True:
	user_input = input('Chat with history: ')
	response = chat(
	'llama3.2',
	messages=messages
	+ [
	{'role': 'user', 'content': user_input},
	],
	)

	# Add the response to the messages to maintain the history
	messages += [
	{'role': 'user', 'content': user_input},
	{'role': 'assistant', 'content': response.message.content},
	]
	print(response.message.content + '\n')


	================================================
	File: /examples/chat.py
	================================================
	from ollama import chat

	messages = [
	{
	'role': 'user',
	'content': 'Why is the sky blue?',
	},
	]

	response = chat('llama3.2', messages=messages)
	print(response['message']['content'])


	================================================
	File: /examples/create.py
	================================================
	import sys

	from ollama import create


	args = sys.argv[1:]
	if len(args) == 2:
	# create from local file
	path = args[1]
	else:
	print('usage: python create.py <name> <filepath>')
	sys.exit(1)

	# TODO: update to real Modelfile values
	modelfile = f"""
	FROM {path}
	"""
	example_modelfile = """
	FROM llama3.2
	# sets the temperature to 1 [higher is more creative, lower is more coherent]
	PARAMETER temperature 1
	# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
	PARAMETER num_ctx 4096

	# sets a custom system message to specify the behavior of the chat assistant
	SYSTEM You are Mario from super mario bros, acting as an assistant.
	"""

	for response in create(model=args[0], modelfile=modelfile, stream=True):
	print(response['status'])


	================================================
	File: /examples/embed.py
	================================================
	from ollama import embed

	response = embed(model='llama3.2', input='Hello, world!')
	print(response['embeddings'])


	================================================
	File: /examples/fill-in-middle.py
	================================================
	from ollama import generate

	prompt = '''def remove_non_ascii(s: str) -> str:
	""" '''

	suffix = """
	return result
	"""

	response = generate(
	model='codellama:7b-code',
	prompt=prompt,
	suffix=suffix,
	options={
	'num_predict': 128,
	'temperature': 0,
	'top_p': 0.9,
	'stop': ['<EOT>'],
	},
	)

	print(response['response'])


	================================================
	File: /examples/generate-stream.py
	================================================
	from ollama import generate


	for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
	print(part['response'], end='', flush=True)


	================================================
	File: /examples/generate.py
	================================================
	from ollama import generate


	response = generate('llama3.2', 'Why is the sky blue?')
	print(response['response'])


	================================================
	File: /examples/list.py
	================================================
	from ollama import list
	from ollama import ListResponse

	response: ListResponse = list()

	for model in response.models:
	print('Name:', model.model)
	print(' Size (MB):', f'{(model.size.real / 1024 / 1024):.2f}')
	if model.details:
	print(' Format:', model.details.format)
	print(' Family:', model.details.family)
	print(' Parameter Size:', model.details.parameter_size)
	print(' Quantization Level:', model.details.quantization_level)
	print('\n')


	================================================
	File: /examples/multimodal-chat.py
	================================================
	from ollama import chat
	# from pathlib import Path

	# Pass in the path to the image
	path = input('Please enter the path to the image: ')

	# You can also pass in base64 encoded image data
	# img = base64.b64encode(Path(path).read_bytes()).decode()
	# or the raw bytes
	# img = Path(path).read_bytes()

	response = chat(
	model='llama3.2-vision',
	messages=[
	{
	'role': 'user',
	'content': 'What is in this image? Be concise.',
	'images': [path],
	}
	],
	)

	print(response.message.content)


	================================================
	File: /examples/multimodal-generate.py
	================================================
	import sys
	import random
	import httpx

	from ollama import generate


	latest = httpx.get('https://xkcd.com/info.0.json')
	latest.raise_for_status()

	if len(sys.argv) > 1:
	num = int(sys.argv[1])
	else:
	num = random.randint(1, latest.json().get('num'))

	comic = httpx.get(f'https://xkcd.com/{num}/info.0.json')
	comic.raise_for_status()

	print(f'xkcd #{comic.json().get("num")}: {comic.json().get("alt")}')
	print(f'link: https://xkcd.com/{num}')
	print('---')

	raw = httpx.get(comic.json().get('img'))
	raw.raise_for_status()

	for response in generate('llava', 'explain this comic:', images=[raw.content], stream=True):
	print(response['response'], end='', flush=True)

	print()


	================================================
	File: /examples/ps.py
	================================================
	from ollama import ps, pull, chat
	from ollama import ProcessResponse

	# Ensure at least one model is loaded
	response = pull('llama3.2', stream=True)
	progress_states = set()
	for progress in response:
	if progress.get('status') in progress_states:
	continue
	progress_states.add(progress.get('status'))
	print(progress.get('status'))

	print('\n')

	print('Waiting for model to load... \n')
	chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])


	response: ProcessResponse = ps()
	for model in response.models:
	print('Model: ', model.model)
	print(' Digest: ', model.digest)
	print(' Expires at: ', model.expires_at)
	print(' Size: ', model.size)
	print(' Size vram: ', model.size_vram)
	print(' Details: ', model.details)
	print('\n')


	================================================
	File: /examples/pull.py
	================================================
	from tqdm import tqdm
	from ollama import pull


	current_digest, bars = '', {}
	for progress in pull('llama3.2', stream=True):
	digest = progress.get('digest', '')
	if digest != current_digest and current_digest in bars:
	bars[current_digest].close()

	if not digest:
	print(progress.get('status'))
	continue

	if digest not in bars and (total := progress.get('total')):
	bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True)

	if completed := progress.get('completed'):
	bars[digest].update(completed - bars[digest].n)

	current_digest = digest


	================================================
	File: /examples/structured-outputs-image.py
	================================================
	from pathlib import Path
	from pydantic import BaseModel
	from typing import Literal
	from ollama import chat


	# Define the schema for image objects
	class Object(BaseModel):
	name: str
	confidence: float
	attributes: str


	class ImageDescription(BaseModel):
	summary: str
	objects: list[Object]
	scene: str
	colors: list[str]
	time_of_day: Literal['Morning', 'Afternoon', 'Evening', 'Night']
	setting: Literal['Indoor', 'Outdoor', 'Unknown']
	text_content: str \| None = None


	# Get path from user input
	path = input('Enter the path to your image: ')
	path = Path(path)

	# Verify the file exists
	if not path.exists():
	raise FileNotFoundError(f'Image not found at: {path}')

	# Set up chat as usual
	response = chat(
	model='llama3.2-vision',
	format=ImageDescription.model_json_schema(), # Pass in the schema for the response
	messages=[
	{
	'role': 'user',
	'content': 'Analyze this image and return a detailed JSON description including objects, scene, colors and any text detected. If you cannot determine certain details, leave those fields empty.',
	'images': [path],
	},
	],
	options={'temperature': 0}, # Set temperature to 0 for more deterministic output
	)


	# Convert received content to the schema
	image_analysis = ImageDescription.model_validate_json(response.message.content)
	print(image_analysis)


	================================================
	File: /examples/structured-outputs.py
	================================================
	from ollama import chat
	from pydantic import BaseModel


	# Define the schema for the response
	class FriendInfo(BaseModel):
	name: str
	age: int
	is_available: bool


	class FriendList(BaseModel):
	friends: list[FriendInfo]


	# schema = {'type': 'object', 'properties': {'friends': {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'age': {'type': 'integer'}, 'is_available': {'type': 'boolean'}}, 'required': ['name', 'age', 'is_available']}}}, 'required': ['friends']}
	response = chat(
	model='llama3.1:8b',
	messages=[{'role': 'user', 'content': 'I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format'}],
	format=FriendList.model_json_schema(), # Use Pydantic to generate the schema or format=schema
	options={'temperature': 0}, # Make responses more deterministic
	)

	# Use Pydantic to validate the response
	friends_response = FriendList.model_validate_json(response.message.content)
	print(friends_response)


	================================================
	File: /examples/tools.py
	================================================
	from ollama import chat
	from ollama import ChatResponse


	def add_two_numbers(a: int, b: int) -> int:
	"""
	Add two numbers

	Args:
	a (int): The first number
	b (int): The second number

	Returns:
	int: The sum of the two numbers
	"""
	return a + b


	def subtract_two_numbers(a: int, b: int) -> int:
	"""
	Subtract two numbers
	"""
	return a - b


	# Tools can still be manually defined and passed into chat
	subtract_two_numbers_tool = {
	'type': 'function',
	'function': {
	'name': 'subtract_two_numbers',
	'description': 'Subtract two numbers',
	'parameters': {
	'type': 'object',
	'required': ['a', 'b'],
	'properties': {
	'a': {'type': 'integer', 'description': 'The first number'},
	'b': {'type': 'integer', 'description': 'The second number'},
	},
	},
	},
	}

	messages = [{'role': 'user', 'content': 'What is three plus one?'}]
	print('Prompt:', messages[0]['content'])

	available_functions = {
	'add_two_numbers': add_two_numbers,
	'subtract_two_numbers': subtract_two_numbers,
	}

	response: ChatResponse = chat(
	'llama3.1',
	messages=messages,
	tools=[add_two_numbers, subtract_two_numbers_tool],
	)

	if response.message.tool_calls:
	# There may be multiple tool calls in the response
	for tool in response.message.tool_calls:
	# Ensure the function is available, and then call it
	if function_to_call := available_functions.get(tool.function.name):
	print('Calling function:', tool.function.name)
	print('Arguments:', tool.function.arguments)
	output = function_to_call(**tool.function.arguments)
	print('Function output:', output)
	else:
	print('Function', tool.function.name, 'not found')

	# Only needed to chat with the model using the tool call results
	if response.message.tool_calls:
	# Add the function response to messages for the model to use
	messages.append(response.message)
	messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

	# Get final response from model with function outputs
	final_response = chat('llama3.1', messages=messages)
	print('Final response:', final_response.message.content)

	else:
	print('No tool calls returned from model')