scottpersinger · September 8, 2024 03:39
diff --git a/images.py b/images.py
 # Got inspiration from here: https://github.com/langchain-ai/langchain/discussions/20820

        human = HumanMessagePromptTemplate.from_template(
            template=[
                {"type": "text", "text": "{input}"},
                {
                     "type": "image_url",
                     "image_url": "{encoded_image_url}",
                },
            ]
        )

        self.image_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system", "{system_prompt}",
                ),
                human,
            ]
        )

        self.lang_agent = (
           {
               "system_prompt": lambda x: x["system_prompt"],
               "input": lambda x: x["input"],
               "encoded_image_url": lambda x: x["encoded_image_url"],
           }
            | self.image_prompt
            | self.llm
            | OpenAIToolsAgentOutputParser()
        )

      
 # Then when you run the agent you have to supply "encoded_image_url" (URL or b64 data).
 # Seems like you have to provide the image *every time* you want to ask a question.
	# Got inspiration from here: https://github.com/langchain-ai/langchain/discussions/20820

	human = HumanMessagePromptTemplate.from_template(
	template=[
	{"type": "text", "text": "{input}"},
	{
	"type": "image_url",
	"image_url": "{encoded_image_url}",
	},
	]
	)

	self.image_prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system", "{system_prompt}",
	),
	human,
	]
	)

	self.lang_agent = (
	{
	"system_prompt": lambda x: x["system_prompt"],
	"input": lambda x: x["input"],
	"encoded_image_url": lambda x: x["encoded_image_url"],
	}
	\| self.image_prompt
	\| self.llm
	\| OpenAIToolsAgentOutputParser()
	)


	# Then when you run the agent you have to supply "encoded_image_url" (URL or b64 data).
	# Seems like you have to provide the image every time you want to ask a question.