yrro · May 29, 2026 13:40 · JoeEnderman · May 4, 2026
diff --git a/filter.py b/filter.py
 """
 title: Strip Thinking Tags
 author: Sam Morris <sam@robots.org.uk>
 author_url: https://gist.github.com/yrro/b0f2765ea55ae3414e06b319dd07ae8e
 version: 0.1
 """

 # Open WebUI re-injects thinking into the assistent messages in the conversation
 # history which is contrary to Gemma 4's requirements. 
 # It's a workaround for <https://github.com/open-webui/open-webui/issues/23339>.
 #
 # There is a setting Admin Panel → Settings → Connection → [your llama.cpp connection] → Provider;
 # Set this to "llama.cpp" and thinking is sent in a separate 'reasoning_content' field instead
 # of being rendered into the message stream. Maybe that makes this filter obsolete?

 from pydantic import BaseModel, Field
 from typing import Optional


 class Filter:

    def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
        new_messages = list(
            self.strip_thinking(message) for message in body["messages"]
        )
        return body | {"messages": new_messages}

    def strip_thinking(self, message: dict):
        new_message = {}
        if message["role"] == "assistant" and message["content"].startswith("<think>"):
            thinking, sep, new_content = message["content"].partition("</think>")
            new_message["content"] = new_content.lstrip()
        #print(f"new_message: {new_message!r}")
        return message | new_message
	"""
	title: Strip Thinking Tags
	author: Sam Morris <sam@robots.org.uk>
	author_url: https://gist.github.com/yrro/b0f2765ea55ae3414e06b319dd07ae8e
	version: 0.1
	"""

	# Open WebUI re-injects thinking into the assistent messages in the conversation
	# history which is contrary to Gemma 4's requirements.
	# It's a workaround for <https://github.com/open-webui/open-webui/issues/23339>.
	#
	# There is a setting Admin Panel → Settings → Connection → [your llama.cpp connection] → Provider;
	# Set this to "llama.cpp" and thinking is sent in a separate 'reasoning_content' field instead
	# of being rendered into the message stream. Maybe that makes this filter obsolete?

	from pydantic import BaseModel, Field
	from typing import Optional


	class Filter:

	def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
	new_messages = list(
	self.strip_thinking(message) for message in body["messages"]
	)
	return body \| {"messages": new_messages}

	def strip_thinking(self, message: dict):
	new_message = {}
	if message["role"] == "assistant" and message["content"].startswith("<think>"):
	thinking, sep, new_content = message["content"].partition("</think>")
	new_message["content"] = new_content.lstrip()
	#print(f"new_message: {new_message!r}")
	return message \| new_message
No results found