graylan0 · December 3, 2023 22:29
diff --git a/gistfile1.txt b/gistfile1.txt
 import base64
 from PIL import Image
 import io
 import nltk
 import aiosqlite
 from nltk.tokenize import word_tokenize
 from nltk.tag import pos_tag
 from nltk.chunk import ne_chunk
 import asyncio
 import datetime
 import openai
 import aiohttp
 import weaviate
 import logging

 nltk.download('punkt')
 nltk.download('averaged_perceptron_tagger')
 nltk.download('maxent_ne_chunker')
 nltk.download('words')

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 weaviate_client = weaviate.Client("http://[WEAVIATE_INSTANCE_URL]")

 def encode_image(image_path):
    with Image.open(image_path) as img:
        buffered = io.BytesIO()
        img.save(buffered, format="JPEG")
        return base64.b64encode(buffered.getvalue()).decode()

 async def store_data_in_db(agent, caption_analysis, timestamp):
    async with aiosqlite.connect("image_analysis.db") as db:
        await db.execute("""
            CREATE TABLE IF NOT EXISTS agent_analysis (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                agent TEXT,
                analysis TEXT,
                timestamp TEXT
            )
        """)
        await db.execute("INSERT INTO agent_analysis (agent, analysis, timestamp) VALUES (?, ?, ?)", (agent, caption_analysis, timestamp,))
        await db.commit()

 def process_response_with_nltk(response_text):
    tokens = word_tokenize(response_text)
    tags = pos_tag(tokens)
    chunks = ne_chunk(tags)
    return ' '.join([' '.join(leaf) for leaf in chunks])

 def determine_loop_count(caption_analysis):
    return min(10, max(3, len(caption_analysis.split()) // 20))

 class IntermodalChunkGenerator:
    def __init__(self, max_chunk_size=1250):
        self.max_chunk_size = max_chunk_size

    def fetch_relevant_info(self, chunk):
        try:
            query = {
                "query": {
                    "nearText": {
                        "concepts": [chunk],
                        "certainty": 0.7
                    }
                }
            }
            response = weaviate_client.query.raw(query)
            relevant_data = []
            if 'data' in response and 'Get' in response['data']:
                for _, items in response['data']['Get'].items():
                    relevant_data.extend([item.get('content', '') for item in items])
            return ' '.join(filter(None, relevant_data))
        except Exception as e:
            logger.error(f"Error fetching from Weaviate: {e}")
            return ""

    def process_chunk(self, chunk):
        relevant_info = self.fetch_relevant_info(chunk)
        combined_chunk = f"{relevant_info} {chunk}"
        return combined_chunk

    def generate(self, input_text):
        prompt_chunks = [input_text[i:i + self.max_chunk_size] for i in range(0, len(input_text), self.max_chunk_size)]
        responses = []
        for chunk in prompt_chunks:
            processed_chunk = self.process_chunk(chunk)
            responses.append(processed_chunk)
        return ''.join(responses)

    async def ask_gpt_for_visual_aid(self, api_key, image_data):
        try:
            async with aiohttp.ClientSession() as session:
                headers = {"Authorization": f"Bearer {api_key}"}
                payload = {
                    "model": "gpt-3.5-turbo",
                    "messages": [
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": f"Analyze this image: {image_data}"}
                    ]
                }
                async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                    response_data = await response.json()
                    return response_data["choices"][0]["message"]["content"].strip()
        except Exception as e:
            logger.error(f"Error during GPT request: {e}")
            return ""

 async def gpt4v_call_image_to_caption(api_key, image_path, caption_0, caption_1):
    base64_image = encode_image(image_path)
    agents = ["Nova Starlight", "Zara Skye", "Zephyr Storm"]
    collective_decision = ""
    initial_round = True
    loop_count = 3
    chunk_generator = IntermodalChunkGenerator()

    while loop_count > 0:
        for agent in agents:
            agent_prompt = {
                "Nova Starlight": "Analyze the image with confidence and wit. Identify key elements and compare the captions with a bold and clever explanation.",
                "Zara Skye": "Approach the image with curiosity and analytical depth. Ask probing questions and provide a thoughtful comparison of the captions.",
                "Zephyr Storm": "Use your creativity and playfulness to interpret the image. Come up with imaginative and witty observations, and compare the captions in a fun way."
            }[agent]

            # Generate visual aid using GPT-3.5 Turbo
            visual_aid = await chunk_generator.ask_gpt_for_visual_aid(api_key, base64_image)

            # Combine the agent prompt with the visual aid
            prompt = "\n".join([agent_prompt, "A. " + caption_0.strip(), "B. " + caption_1.strip(), "Visual Aid: " + visual_aid, "Image: "])

            async with aiohttp.ClientSession() as session:
                headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(api_key)}
                payload = {
                    "model": "gpt-4-vision-preview",
                    "messages": [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{}".format(base64_image), "detail": "high"}}]}],
                    "max_tokens": 200
                }
                async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                    response_data = await response.json()
                    caption_analysis = response_data["choices"][0]["message"]["content"].strip()

            processed_caption = process_response_with_nltk(caption_analysis)
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            await store_data_in_db(agent, processed_caption, timestamp)
            collective_decision += f"{agent} [{timestamp}]'s Analysis: {processed_caption}\n"
        
        if initial_round:
            loop_count = determine_loop_count(collective_decision)
            initial_round = False
        loop_count -= 1

    final_decision = chunk_generator.generate(collective_decision)
    return final_decision

 # Main execution
 OAI_KEY = 'your-api-key'
 asyncio.run(gpt4v_call_image_to_caption(OAI_KEY, 'path/to/image.jpg', 'Caption A', 'Caption B'))
	import base64
	from PIL import Image
	import io
	import nltk
	import aiosqlite
	from nltk.tokenize import word_tokenize
	from nltk.tag import pos_tag
	from nltk.chunk import ne_chunk
	import asyncio
	import datetime
	import openai
	import aiohttp
	import weaviate
	import logging

	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('maxent_ne_chunker')
	nltk.download('words')

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	weaviate_client = weaviate.Client("http://[WEAVIATE_INSTANCE_URL]")

	def encode_image(image_path):
	with Image.open(image_path) as img:
	buffered = io.BytesIO()
	img.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode()

	async def store_data_in_db(agent, caption_analysis, timestamp):
	async with aiosqlite.connect("image_analysis.db") as db:
	await db.execute("""
	CREATE TABLE IF NOT EXISTS agent_analysis (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	agent TEXT,
	analysis TEXT,
	timestamp TEXT
	)
	""")
	await db.execute("INSERT INTO agent_analysis (agent, analysis, timestamp) VALUES (?, ?, ?)", (agent, caption_analysis, timestamp,))
	await db.commit()

	def process_response_with_nltk(response_text):
	tokens = word_tokenize(response_text)
	tags = pos_tag(tokens)
	chunks = ne_chunk(tags)
	return ' '.join([' '.join(leaf) for leaf in chunks])

	def determine_loop_count(caption_analysis):
	return min(10, max(3, len(caption_analysis.split()) // 20))

	class IntermodalChunkGenerator:
	def __init__(self, max_chunk_size=1250):
	self.max_chunk_size = max_chunk_size

	def fetch_relevant_info(self, chunk):
	try:
	query = {
	"query": {
	"nearText": {
	"concepts": [chunk],
	"certainty": 0.7
	}
	}
	}
	response = weaviate_client.query.raw(query)
	relevant_data = []
	if 'data' in response and 'Get' in response['data']:
	for _, items in response['data']['Get'].items():
	relevant_data.extend([item.get('content', '') for item in items])
	return ' '.join(filter(None, relevant_data))
	except Exception as e:
	logger.error(f"Error fetching from Weaviate: {e}")
	return ""

	def process_chunk(self, chunk):
	relevant_info = self.fetch_relevant_info(chunk)
	combined_chunk = f"{relevant_info} {chunk}"
	return combined_chunk

	def generate(self, input_text):
	prompt_chunks = [input_text[i:i + self.max_chunk_size] for i in range(0, len(input_text), self.max_chunk_size)]
	responses = []
	for chunk in prompt_chunks:
	processed_chunk = self.process_chunk(chunk)
	responses.append(processed_chunk)
	return ''.join(responses)

	async def ask_gpt_for_visual_aid(self, api_key, image_data):
	try:
	async with aiohttp.ClientSession() as session:
	headers = {"Authorization": f"Bearer {api_key}"}
	payload = {
	"model": "gpt-3.5-turbo",
	"messages": [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"Analyze this image: {image_data}"}
	]
	}
	async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
	response_data = await response.json()
	return response_data["choices"][0]["message"]["content"].strip()
	except Exception as e:
	logger.error(f"Error during GPT request: {e}")
	return ""

	async def gpt4v_call_image_to_caption(api_key, image_path, caption_0, caption_1):
	base64_image = encode_image(image_path)
	agents = ["Nova Starlight", "Zara Skye", "Zephyr Storm"]
	collective_decision = ""
	initial_round = True
	loop_count = 3
	chunk_generator = IntermodalChunkGenerator()

	while loop_count > 0:
	for agent in agents:
	agent_prompt = {
	"Nova Starlight": "Analyze the image with confidence and wit. Identify key elements and compare the captions with a bold and clever explanation.",
	"Zara Skye": "Approach the image with curiosity and analytical depth. Ask probing questions and provide a thoughtful comparison of the captions.",
	"Zephyr Storm": "Use your creativity and playfulness to interpret the image. Come up with imaginative and witty observations, and compare the captions in a fun way."
	}[agent]

	# Generate visual aid using GPT-3.5 Turbo
	visual_aid = await chunk_generator.ask_gpt_for_visual_aid(api_key, base64_image)

	# Combine the agent prompt with the visual aid
	prompt = "\n".join([agent_prompt, "A. " + caption_0.strip(), "B. " + caption_1.strip(), "Visual Aid: " + visual_aid, "Image: "])

	async with aiohttp.ClientSession() as session:
	headers = {"Content-Type": "application/json", "Authorization": "Bearer {}".format(api_key)}
	payload = {
	"model": "gpt-4-vision-preview",
	"messages": [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{}".format(base64_image), "detail": "high"}}]}],
	"max_tokens": 200
	}
	async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
	response_data = await response.json()
	caption_analysis = response_data["choices"][0]["message"]["content"].strip()

	processed_caption = process_response_with_nltk(caption_analysis)
	timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	await store_data_in_db(agent, processed_caption, timestamp)
	collective_decision += f"{agent} [{timestamp}]'s Analysis: {processed_caption}\n"

	if initial_round:
	loop_count = determine_loop_count(collective_decision)
	initial_round = False
	loop_count -= 1

	final_decision = chunk_generator.generate(collective_decision)
	return final_decision

	# Main execution
	OAI_KEY = 'your-api-key'
	asyncio.run(gpt4v_call_image_to_caption(OAI_KEY, 'path/to/image.jpg', 'Caption A', 'Caption B'))