Last active
February 3, 2025 15:29
-
-
Save JayK31/31262500c62c17726479185a222065ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_product_from_llm(frames): | |
| PROMPT_MESSAGES = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| "In 4 words or fewer, identify the main product in this advertisement. Be specific (e.g., car make and model).", | |
| *map(lambda x: {"image": x, "resize": 350}, frames), | |
| ], | |
| }, | |
| ] | |
| params = { | |
| "model": "gpt-4o", | |
| "messages": PROMPT_MESSAGES, | |
| "max_tokens": 200, | |
| } | |
| result = openai_client.chat.completions.create(**params) | |
| return result | |
| def get_frames_from_video(vid, target_frames=50): | |
| base64_frames = [] | |
| total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| frame_indices = [ | |
| int(i * total_frames / target_frames) for i in range(target_frames) | |
| ] | |
| frame_count = 0 | |
| while vid.isOpened(): | |
| success, frame = vid.read() | |
| if not success: | |
| break | |
| if frame_count in frame_indices: | |
| _, buffer = cv2.imencode(".jpg", frame) | |
| base64_frames.append(base64.b64encode(buffer).decode("utf-8")) | |
| frame_count += 1 | |
| return base64_frames | |
| ... | |
| vid = cv2.VideoCapture(out_file) | |
| base64_frames = get_frames_from_video(vid, target_frames=50) | |
| llm_result = get_product_from_llm(base64_frames) | |
| product = llm_result.choices[0].message.content |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment