Last active
December 3, 2023 22:29
-
-
Save graylan0/6114ac940e7e6966cb379fc94681748b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
from PIL import Image | |
import io | |
import nltk | |
import aiosqlite | |
from nltk.tokenize import word_tokenize | |
from nltk.tag import pos_tag | |
from nltk.chunk import ne_chunk | |
import asyncio | |
import datetime | |
import openai | |
import aiohttp | |
import weaviate | |
import logging | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
nltk.download('maxent_ne_chunker') | |
nltk.download('words') | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
weaviate_client = weaviate.Client("http://[WEAVIATE_INSTANCE_URL]") | |
def encode_image(image_path): | |
with Image.open(image_path) as img: | |
buffered = io.BytesIO() | |
img.save(buffered, format="JPEG") | |
return base64.b64encode(buffered.getvalue()).decode() | |
async def store_data_in_db(agent, caption_analysis, timestamp): | |
async with aiosqlite.connect("image_analysis.db") as db: | |
await db.execute(""" | |
CREATE TABLE IF NOT EXISTS agent_analysis ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
agent TEXT, | |
analysis TEXT, | |
timestamp TEXT | |
) | |
""") | |
await db.execute("INSERT INTO agent_analysis (agent, analysis, timestamp) VALUES (?, ?, ?)", (agent, caption_analysis, timestamp,)) | |
await db.commit() | |
def process_response_with_nltk(response_text): | |
tokens = word_tokenize(response_text) | |
tags = pos_tag(tokens) | |
chunks = ne_chunk(tags) | |
return ' '.join([' '.join(leaf) for leaf in chunks]) | |
def determine_loop_count(caption_analysis): | |
return min(10, max(3, len(caption_analysis.split()) // 20)) | |
class IntermodalChunkGenerator: | |
def __init__(self, max_chunk_size=1250): | |
self.max_chunk_size = max_chunk_size | |
def fetch_relevant_info(self, chunk): | |
try: | |
query = { | |
"query": { | |
"nearText": { | |
"concepts": [chunk], | |
"certainty": 0.7 | |
} | |
} | |
} | |
response = weaviate_client.query.raw(query) | |
relevant_data = [] | |
if 'data' in response and 'Get' in response['data']: | |
for _, items in response['data']['Get'].items(): | |
relevant_data.extend([item.get('content', '') for item in items]) | |
return ' '.join(filter(None, relevant_data)) | |
except Exception as e: | |
logger.error(f"Error fetching from Weaviate: {e}") | |
return "" | |
def process_chunk(self, chunk): | |
relevant_info = self.fetch_relevant_info(chunk) | |
combined_chunk = f"{relevant_info} {chunk}" | |
return combined_chunk | |
def generate(self, input_text): | |
prompt_chunks = [input_text[i:i + self.max_chunk_size] for i in range(0, len(input_text), self.max_chunk_size)] | |
responses = [] | |
for chunk in prompt_chunks: | |
processed_chunk = self.process_chunk(chunk) | |
responses.append(processed_chunk) | |
return ''.join(responses) | |
async def ask_gpt_for_visual_aid(self, api_key, image_data): | |
try: | |
async with aiohttp.ClientSession() as session: | |
headers = {"Authorization": f"Bearer {api_key}"} | |
payload = { | |
"model": "gpt-3.5-turbo", | |
"messages": [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"Analyze this image: {image_data}"} | |
] | |
} | |
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response: | |
response_data = await response.json() | |
return response_data["choices"][0]["message"]["content"].strip() | |
except Exception as e: | |
logger.error(f"Error during GPT request: {e}") | |
return "" | |
async def gpt4v_call_image_to_caption(api_key, image_path, caption_0, caption_1): | |
base64_image = encode_image(image_path) | |
agents = ["Nova Starlight", "Zara Skye", "Zephyr Storm"] | |
collective_decision = "" | |
initial_round = True | |
loop_count = 3 | |
chunk_generator = IntermodalChunkGenerator() | |
while loop_count > 0: | |
for agent in agents: | |
agent_prompt = { | |
"Nova Starlight": "Analyze the image with confidence and wit. Identify key elements and compare the captions with a bold and clever explanation.", | |
"Zara Skye": "Approach the image with curiosity and analytical depth. Ask probing questions and provide a thoughtful comparison of the captions.", | |
"Zephyr Storm": "Use your creativity and playfulness to interpret the image. Come up with imaginative and witty observations, and compare the captions in a fun way." | |
}[agent] | |
# Generate visual aid using GPT-3.5 Turbo | |
visual_aid = await chunk_generator.ask_gpt_for_visual_aid(api_key, base64_image) | |
# Combine the agent prompt with the visual aid | |
prompt = "\n".join([agent_prompt, "A. " + caption_0.strip(), "B. " + caption_1.strip(), "Visual Aid: " + visual_aid, "Image: "]) | |
async with aiohttp.ClientSession() as session: | |
headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(api_key)} | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{}".format(base64_image), "detail": "high"}}]}], | |
"max_tokens": 200 | |
} | |
async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response: | |
response_data = await response.json() | |
caption_analysis = response_data["choices"][0]["message"]["content"].strip() | |
processed_caption = process_response_with_nltk(caption_analysis) | |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
await store_data_in_db(agent, processed_caption, timestamp) | |
collective_decision += f"{agent} [{timestamp}]'s Analysis: {processed_caption}\n" | |
if initial_round: | |
loop_count = determine_loop_count(collective_decision) | |
initial_round = False | |
loop_count -= 1 | |
final_decision = chunk_generator.generate(collective_decision) | |
return final_decision | |
# Main execution | |
OAI_KEY = 'your-api-key' | |
asyncio.run(gpt4v_call_image_to_caption(OAI_KEY, 'path/to/image.jpg', 'Caption A', 'Caption B')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment