Created
September 8, 2024 03:39
-
-
Save scottpersinger/ea3fc24b679d700272573cdd33734498 to your computer and use it in GitHub Desktop.
Doing image analysis with LangChain agents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Got inspiration from here: https://github.com/langchain-ai/langchain/discussions/20820 | |
human = HumanMessagePromptTemplate.from_template( | |
template=[ | |
{"type": "text", "text": "{input}"}, | |
{ | |
"type": "image_url", | |
"image_url": "{encoded_image_url}", | |
}, | |
] | |
) | |
self.image_prompt = ChatPromptTemplate.from_messages( | |
[ | |
( | |
"system", "{system_prompt}", | |
), | |
human, | |
] | |
) | |
self.lang_agent = ( | |
{ | |
"system_prompt": lambda x: x["system_prompt"], | |
"input": lambda x: x["input"], | |
"encoded_image_url": lambda x: x["encoded_image_url"], | |
} | |
| self.image_prompt | |
| self.llm | |
| OpenAIToolsAgentOutputParser() | |
) | |
# Then when you run the agent you have to supply "encoded_image_url" (URL or b64 data). | |
# Seems like you have to provide the image *every time* you want to ask a question. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment