JayK31 · February 3, 2025 15:29
diff --git a/Get product from LLM b/Get product from LLM

 def get_product_from_llm(frames):
    PROMPT_MESSAGES = [
        {
            "role": "user",
            "content": [
                "In 4 words or fewer, identify the main product in this advertisement. Be specific (e.g., car make and model).",
                *map(lambda x: {"image": x, "resize": 350}, frames),
            ],
        },
    ]
    params = {
        "model": "gpt-4o",
        "messages": PROMPT_MESSAGES,
        "max_tokens": 200,
    }
    result = openai_client.chat.completions.create(**params)
    return result

 def get_frames_from_video(vid, target_frames=50):
    base64_frames = []
    total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = [
        int(i * total_frames / target_frames) for i in range(target_frames)
    ]
    frame_count = 0
    while vid.isOpened():
        success, frame = vid.read()
        if not success:
            break
        if frame_count in frame_indices:
            _, buffer = cv2.imencode(".jpg", frame)
            base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
        frame_count += 1
    return base64_frames

 ...

 vid = cv2.VideoCapture(out_file)
 base64_frames = get_frames_from_video(vid, target_frames=50)
 llm_result = get_product_from_llm(base64_frames)
 product = llm_result.choices[0].message.content

	def get_product_from_llm(frames):
	PROMPT_MESSAGES = [
	{
	"role": "user",
	"content": [
	"In 4 words or fewer, identify the main product in this advertisement. Be specific (e.g., car make and model).",
	*map(lambda x: {"image": x, "resize": 350}, frames),
	],
	},
	]
	params = {
	"model": "gpt-4o",
	"messages": PROMPT_MESSAGES,
	"max_tokens": 200,
	}
	result = openai_client.chat.completions.create(**params)
	return result

	def get_frames_from_video(vid, target_frames=50):
	base64_frames = []
	total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
	frame_indices = [
	int(i * total_frames / target_frames) for i in range(target_frames)
	]
	frame_count = 0
	while vid.isOpened():
	success, frame = vid.read()
	if not success:
	break
	if frame_count in frame_indices:
	_, buffer = cv2.imencode(".jpg", frame)
	base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
	frame_count += 1
	return base64_frames

	...

	vid = cv2.VideoCapture(out_file)
	base64_frames = get_frames_from_video(vid, target_frames=50)
	llm_result = get_product_from_llm(base64_frames)
	product = llm_result.choices[0].message.content