Skip to content

Instantly share code, notes, and snippets.

@graylan0
Last active December 3, 2023 22:29
Show Gist options
  • Save graylan0/6114ac940e7e6966cb379fc94681748b to your computer and use it in GitHub Desktop.
Save graylan0/6114ac940e7e6966cb379fc94681748b to your computer and use it in GitHub Desktop.
import base64
from PIL import Image
import io
import nltk
import aiosqlite
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
import asyncio
import datetime
import openai
import aiohttp
import weaviate
import logging
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
weaviate_client = weaviate.Client("http://[WEAVIATE_INSTANCE_URL]")
def encode_image(image_path):
with Image.open(image_path) as img:
buffered = io.BytesIO()
img.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode()
async def store_data_in_db(agent, caption_analysis, timestamp):
async with aiosqlite.connect("image_analysis.db") as db:
await db.execute("""
CREATE TABLE IF NOT EXISTS agent_analysis (
id INTEGER PRIMARY KEY AUTOINCREMENT,
agent TEXT,
analysis TEXT,
timestamp TEXT
)
""")
await db.execute("INSERT INTO agent_analysis (agent, analysis, timestamp) VALUES (?, ?, ?)", (agent, caption_analysis, timestamp,))
await db.commit()
def process_response_with_nltk(response_text):
tokens = word_tokenize(response_text)
tags = pos_tag(tokens)
chunks = ne_chunk(tags)
return ' '.join([' '.join(leaf) for leaf in chunks])
def determine_loop_count(caption_analysis):
return min(10, max(3, len(caption_analysis.split()) // 20))
class IntermodalChunkGenerator:
def __init__(self, max_chunk_size=1250):
self.max_chunk_size = max_chunk_size
def fetch_relevant_info(self, chunk):
try:
query = {
"query": {
"nearText": {
"concepts": [chunk],
"certainty": 0.7
}
}
}
response = weaviate_client.query.raw(query)
relevant_data = []
if 'data' in response and 'Get' in response['data']:
for _, items in response['data']['Get'].items():
relevant_data.extend([item.get('content', '') for item in items])
return ' '.join(filter(None, relevant_data))
except Exception as e:
logger.error(f"Error fetching from Weaviate: {e}")
return ""
def process_chunk(self, chunk):
relevant_info = self.fetch_relevant_info(chunk)
combined_chunk = f"{relevant_info} {chunk}"
return combined_chunk
def generate(self, input_text):
prompt_chunks = [input_text[i:i + self.max_chunk_size] for i in range(0, len(input_text), self.max_chunk_size)]
responses = []
for chunk in prompt_chunks:
processed_chunk = self.process_chunk(chunk)
responses.append(processed_chunk)
return ''.join(responses)
async def ask_gpt_for_visual_aid(self, api_key, image_data):
try:
async with aiohttp.ClientSession() as session:
headers = {"Authorization": f"Bearer {api_key}"}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Analyze this image: {image_data}"}
]
}
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
response_data = await response.json()
return response_data["choices"][0]["message"]["content"].strip()
except Exception as e:
logger.error(f"Error during GPT request: {e}")
return ""
async def gpt4v_call_image_to_caption(api_key, image_path, caption_0, caption_1):
base64_image = encode_image(image_path)
agents = ["Nova Starlight", "Zara Skye", "Zephyr Storm"]
collective_decision = ""
initial_round = True
loop_count = 3
chunk_generator = IntermodalChunkGenerator()
while loop_count > 0:
for agent in agents:
agent_prompt = {
"Nova Starlight": "Analyze the image with confidence and wit. Identify key elements and compare the captions with a bold and clever explanation.",
"Zara Skye": "Approach the image with curiosity and analytical depth. Ask probing questions and provide a thoughtful comparison of the captions.",
"Zephyr Storm": "Use your creativity and playfulness to interpret the image. Come up with imaginative and witty observations, and compare the captions in a fun way."
}[agent]
# Generate visual aid using GPT-3.5 Turbo
visual_aid = await chunk_generator.ask_gpt_for_visual_aid(api_key, base64_image)
# Combine the agent prompt with the visual aid
prompt = "\n".join([agent_prompt, "A. " + caption_0.strip(), "B. " + caption_1.strip(), "Visual Aid: " + visual_aid, "Image: "])
async with aiohttp.ClientSession() as session:
headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(api_key)}
payload = {
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{}".format(base64_image), "detail": "high"}}]}],
"max_tokens": 200
}
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
response_data = await response.json()
caption_analysis = response_data["choices"][0]["message"]["content"].strip()
processed_caption = process_response_with_nltk(caption_analysis)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
await store_data_in_db(agent, processed_caption, timestamp)
collective_decision += f"{agent} [{timestamp}]'s Analysis: {processed_caption}\n"
if initial_round:
loop_count = determine_loop_count(collective_decision)
initial_round = False
loop_count -= 1
final_decision = chunk_generator.generate(collective_decision)
return final_decision
# Main execution
OAI_KEY = 'your-api-key'
asyncio.run(gpt4v_call_image_to_caption(OAI_KEY, 'path/to/image.jpg', 'Caption A', 'Caption B'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment