Created
May 4, 2025 20:28
-
-
Save Frank-Buss/fcbedac2d6afe86fa71266d419db10d5 to your computer and use it in GitHub Desktop.
applies recursively the same prompt for editing an image and then creates a mp4 video of it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import time | |
import argparse | |
import shutil | |
import requests | |
import subprocess | |
import io | |
import traceback | |
import base64 | |
from pathlib import Path | |
from dotenv import load_dotenv | |
import openai | |
from openai import OpenAI | |
from PIL import Image | |
# Load environment variables and initialize client | |
load_dotenv() | |
API_KEY = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=API_KEY) | |
# Parse command line arguments | |
parser = argparse.ArgumentParser(description="Use images.edit(...) with no mask, by making entire image transparent.") | |
parser.add_argument("image", help="Path to the input image (any format).") | |
parser.add_argument("--prompt", default="Recreate the image as closely to original as possible, without changing anything.", | |
help="Edit prompt for each iteration.") | |
parser.add_argument("--iterations", type=int, default=5, | |
help="How many frames to generate.") | |
parser.add_argument("--fps", type=int, default=2, | |
help="Frames per second for final video.") | |
parser.add_argument("--output", default="output_video.mp4", | |
help="Output video filename.") | |
parser.add_argument("--model", default="gpt-image-1", | |
help="Name/ID of the image-edit model to use. You can use gpt-image-1, dall-e-2, or dall-e-3.") | |
parser.add_argument("--size", default="1024x1024", | |
help="Size for each generated image (e.g. '1024x1024').") | |
args = parser.parse_args() | |
if not API_KEY: | |
print("Error: OPENAI_API_KEY not set in environment or .env file.") | |
exit(1) | |
# Set up frames directory | |
frames_dir = Path("output_frames") | |
if frames_dir.exists(): | |
shutil.rmtree(frames_dir) | |
frames_dir.mkdir() | |
# Convert the user's image to an RGBA with alpha=0 | |
base_name = Path(args.image).stem | |
transparent_png = frames_dir / f"{base_name}_transparent.png" | |
# Make the image transparent | |
try: | |
with Image.open(Path(args.image)).convert("RGBA") as img: | |
# Make sure the dimensions are even for safety | |
w, h = img.size | |
w += w % 2 | |
h += h % 2 | |
if (w, h) != img.size: | |
img = img.resize((w, h), Image.LANCZOS) | |
# Extract RGBA channels | |
r, g, b, a = img.split() | |
# Make alpha fully transparent (0) everywhere | |
alpha_data = a.load() | |
for y in range(img.size[1]): | |
for x in range(img.size[0]): | |
alpha_data[x, y] = 0 # fully transparent | |
# Put channels back | |
transparent_img = Image.merge("RGBA", (r, g, b, a)) | |
transparent_img.save(transparent_png, "PNG") | |
except Exception as e: | |
print(f"Failed converting to RGBA transparent: {e}") | |
exit(1) | |
# Copy first frame | |
first_frame = frames_dir / "frame_0000.png" | |
shutil.copy2(transparent_png, first_frame) | |
current_frame = first_frame | |
# Generate frames | |
for i in range(1, args.iterations + 1): | |
next_frame = frames_dir / f"frame_{i:04d}.png" | |
print(f"Generating frame {i}/{args.iterations} ...") | |
# Edit image with OpenAI API | |
try: | |
with open(current_frame, "rb") as base_file: | |
response = client.images.edit( | |
model=args.model, | |
image=base_file, | |
prompt=args.prompt, | |
n=1, | |
size=args.size, | |
) | |
# Process response | |
if not response or not hasattr(response, 'data') or not response.data: | |
print("Error: API response is missing data") | |
exit(1) | |
# Try to get image data from b64_json | |
if hasattr(response.data[0], 'b64_json') and response.data[0].b64_json: | |
image_bytes = base64.b64decode(response.data[0].b64_json) | |
with Image.open(io.BytesIO(image_bytes)) as newimg: | |
# Ensure even dims | |
w, h = newimg.size | |
w += w % 2 | |
h += h % 2 | |
if (w, h) != newimg.size: | |
newimg = newimg.resize((w, h), Image.LANCZOS) | |
newimg.save(next_frame, "PNG") | |
# Fall back to URL if b64_json not available | |
elif hasattr(response.data[0], 'url') and response.data[0].url: | |
image_url = response.data[0].url | |
r = requests.get(image_url) | |
r.raise_for_status() | |
with Image.open(io.BytesIO(r.content)) as newimg: | |
w, h = newimg.size | |
w += w % 2 | |
h += h % 2 | |
if (w, h) != newimg.size: | |
newimg = newimg.resize((w, h), Image.LANCZOS) | |
newimg.save(next_frame, "PNG") | |
else: | |
print("Error: No usable image data in response") | |
exit(1) | |
except Exception as e: | |
print(f"Error: Failed to generate frame: {e}") | |
exit(1) | |
# Update current frame for next iteration | |
current_frame = next_frame | |
time.sleep(1) # Small delay to avoid rate-limiting | |
# All frames ready, create video with ffmpeg | |
try: | |
with Image.open(first_frame) as im0: | |
w, h = im0.size | |
w += w % 2 | |
h += h % 2 | |
except Exception as e: | |
print(f"Error: Failed to read the first frame to determine dimensions: {e}") | |
exit(1) | |
# Build ffmpeg command | |
cmd = [ | |
"ffmpeg", | |
"-y", | |
"-framerate", str(args.fps), | |
"-i", f"{frames_dir}/frame_%04d.png", | |
"-vf", f"scale={w}:{h},fps=24", | |
"-c:v", "libx264", | |
"-profile:v", "main", | |
"-preset", "medium", | |
"-crf", "23", | |
"-pix_fmt", "yuv420p", | |
"-g", "1", | |
"-keyint_min", "1", | |
"-sc_threshold", "0", | |
args.output, | |
] | |
print("Running ffmpeg:", " ".join(cmd)) | |
try: | |
subprocess.run(cmd, check=True) | |
print(f"Video created: {args.output}") | |
except subprocess.CalledProcessError as e: | |
print(f"ffmpeg error: {e}") | |
exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment