Skip to content

Instantly share code, notes, and snippets.

@Phate334
Last active May 16, 2025 09:22
Show Gist options
  • Save Phate334/dd633561879f41a8c4affc4031df1c7f to your computer and use it in GitHub Desktop.
Save Phate334/dd633561879f41a8c4affc4031df1c7f to your computer and use it in GitHub Desktop.
Mistral 3.1 chat template for llama.cpp, including tools and support for multiple images. modify from vllm https://github.com/vllm-project/vllm/blob/main/examples/tool_chat_template_mistral.jinja
services:
llm:
image: ghcr.io/ggml-org/llama.cpp:server-cuda-b5391
ports:
- "7071:8080"
volumes:
- ./models:/models
env_file:
- llm.env
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
LLAMA_ARG_HOST=0.0.0.0
LLAMA_ARG_MODEL=/models/unsloth_Mistral-Small-3.1-24B-Instruct-2503-Q4_K_M.gguf
LLAMA_ARG_ALIAS=mistral-small3.1:24b-instruct-2503-q4_K_M
LLAMA_ARG_MMPROJ=/models/mmproj-unsloth_Mistral-Small-3.1-24B-Instruct-2503-f16.gguf
LLAMA_ARG_NO_MMPROJ_OFFLOAD=0
LLAMA_ARG_N_GPU_LAYERS=99
LLAMA_ARG_FLASH_ATTN=1
LLAMA_ARG_CTX_SIZE=8192
LLAMA_ARG_N_PARALLEL=1
LLAMA_ARG_CACHE_REUSE=256
LLAMA_ARG_CACHE_TYPE_K=q8_0
LLAMA_ARG_CACHE_TYPE_V=q8_0
LLAMA_ARG_JINJA=1
LLAMA_ARG_CHAT_TEMPLATE_FILE=/models/mistral.jinja
LLAMA_ARG_ENDPOINT_METRICS=1
{%- macro render_message_content(content) -%}
{%- if content is string -%}
{{ content }}
{%- else -%}
{%- for block in content -%}
{%- if block["type"] == "text" -%}
{{ block["text"] }}
{%- elif block["type"] in ["image", "image_url"] -%}
[IMG]
{%- else -%}
{{ raise_exception("Only text and image blocks are supported in message content!") }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endmacro -%}
{%- macro validate_tool_call_id(id, context="") -%}
{%- if not id is defined or id|length < 9 -%}
{{ raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (" + context + ")") }}
{%- endif -%}
{{ id[-9:] }}
{%- endmacro -%}
{%- if messages[0]["role"] == "system" %}
{%- if messages[0]["content"] is string %}
{%- set system_message = messages[0]["content"] %}
{%- else %}
{%- set system_message = messages[0]["content"][0]["text"] %}
{%- endif %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
{%- set has_no_tool_calls = [] %}
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") %}
{%- if "tool_calls" not in message %}
{%- do has_no_tool_calls.append(message) %}
{%- endif %}
{%- endfor %}
{%- for message in has_no_tool_calls %}
{%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
{{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
{%- endif %}
{%- endfor %}
{{- bos_token }}
{%- for message in loop_messages %}
{%- if message["role"] == "user" %}
{%- if tools is not none and (message == user_messages[-1]) %}
{{- "[AVAILABLE_TOOLS] [" }}
{%- for tool in tools %}
{%- set tool = tool.function %}
{{- '{"type": "function", "function": {' }}
{%- for key, val in tool.items() if key != "return" %}
{%- if val is string %}
{{- '"' + key + '": "' + val + '"' }}
{%- else %}
{{- '"' + key + '": ' + val|tojson }}
{%- endif %}
{%- if not loop.last %}
{{- ", " }}
{%- endif %}
{%- endfor %}
{{- "}}" }}
{%- if not loop.last %}
{{- ", " }}
{%- else %}
{{- "]" }}
{%- endif %}
{%- endfor %}
{{- "[/AVAILABLE_TOOLS]" }}
{%- endif %}
{{- "[INST] " }}
{%- if loop.last and system_message is defined %}
{{- system_message + "\n\n" }}
{%- endif %}
{{- render_message_content(message["content"]) }}
{{- "[/INST]" }}
{%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
{%- if message.tool_calls is defined %}
{%- set tool_calls = message.tool_calls %}
{%- else %}
{%- set tool_calls = message.content %}
{%- endif %}
{{- "[TOOL_CALLS] [" }}
{%- for tool_call in tool_calls %}
{%- set out = tool_call.function|tojson %}
{{- out[:-1] }}
{{- ', "id": "' + validate_tool_call_id(tool_call.id, "1") + '"}' }}
{%- if not loop.last %}
{{- ", " }}
{%- else %}
{{- "]" + eos_token }}
{%- endif %}
{%- endfor %}
{%- elif message["role"] == "assistant" %}
{{- " " + render_message_content(message["content"]) + eos_token }}
{%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
{%- if message.content is defined and message.content.content is defined %}
{%- set content = message.content.content %}
{%- else %}
{%- set content = message.content %}
{%- endif %}
{{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
{{- '"call_id": "' + validate_tool_call_id(message.tool_call_id, "2") + '"}[/TOOL_RESULTS]' }}
{%- else %}
{{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
{%- endif %}
{%- endfor %}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment