Skip to content

Instantly share code, notes, and snippets.

@Frank-Buss
Created May 4, 2025 20:28
Show Gist options
  • Save Frank-Buss/fcbedac2d6afe86fa71266d419db10d5 to your computer and use it in GitHub Desktop.
Save Frank-Buss/fcbedac2d6afe86fa71266d419db10d5 to your computer and use it in GitHub Desktop.
applies recursively the same prompt for editing an image and then creates a mp4 video of it
#!/usr/bin/env python3
import os
import time
import argparse
import shutil
import requests
import subprocess
import io
import traceback
import base64
from pathlib import Path
from dotenv import load_dotenv
import openai
from openai import OpenAI
from PIL import Image
# Load environment variables and initialize client
load_dotenv()
API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=API_KEY)
# Parse command line arguments
parser = argparse.ArgumentParser(description="Use images.edit(...) with no mask, by making entire image transparent.")
parser.add_argument("image", help="Path to the input image (any format).")
parser.add_argument("--prompt", default="Recreate the image as closely to original as possible, without changing anything.",
help="Edit prompt for each iteration.")
parser.add_argument("--iterations", type=int, default=5,
help="How many frames to generate.")
parser.add_argument("--fps", type=int, default=2,
help="Frames per second for final video.")
parser.add_argument("--output", default="output_video.mp4",
help="Output video filename.")
parser.add_argument("--model", default="gpt-image-1",
help="Name/ID of the image-edit model to use. You can use gpt-image-1, dall-e-2, or dall-e-3.")
parser.add_argument("--size", default="1024x1024",
help="Size for each generated image (e.g. '1024x1024').")
args = parser.parse_args()
if not API_KEY:
print("Error: OPENAI_API_KEY not set in environment or .env file.")
exit(1)
# Set up frames directory
frames_dir = Path("output_frames")
if frames_dir.exists():
shutil.rmtree(frames_dir)
frames_dir.mkdir()
# Convert the user's image to an RGBA with alpha=0
base_name = Path(args.image).stem
transparent_png = frames_dir / f"{base_name}_transparent.png"
# Make the image transparent
try:
with Image.open(Path(args.image)).convert("RGBA") as img:
# Make sure the dimensions are even for safety
w, h = img.size
w += w % 2
h += h % 2
if (w, h) != img.size:
img = img.resize((w, h), Image.LANCZOS)
# Extract RGBA channels
r, g, b, a = img.split()
# Make alpha fully transparent (0) everywhere
alpha_data = a.load()
for y in range(img.size[1]):
for x in range(img.size[0]):
alpha_data[x, y] = 0 # fully transparent
# Put channels back
transparent_img = Image.merge("RGBA", (r, g, b, a))
transparent_img.save(transparent_png, "PNG")
except Exception as e:
print(f"Failed converting to RGBA transparent: {e}")
exit(1)
# Copy first frame
first_frame = frames_dir / "frame_0000.png"
shutil.copy2(transparent_png, first_frame)
current_frame = first_frame
# Generate frames
for i in range(1, args.iterations + 1):
next_frame = frames_dir / f"frame_{i:04d}.png"
print(f"Generating frame {i}/{args.iterations} ...")
# Edit image with OpenAI API
try:
with open(current_frame, "rb") as base_file:
response = client.images.edit(
model=args.model,
image=base_file,
prompt=args.prompt,
n=1,
size=args.size,
)
# Process response
if not response or not hasattr(response, 'data') or not response.data:
print("Error: API response is missing data")
exit(1)
# Try to get image data from b64_json
if hasattr(response.data[0], 'b64_json') and response.data[0].b64_json:
image_bytes = base64.b64decode(response.data[0].b64_json)
with Image.open(io.BytesIO(image_bytes)) as newimg:
# Ensure even dims
w, h = newimg.size
w += w % 2
h += h % 2
if (w, h) != newimg.size:
newimg = newimg.resize((w, h), Image.LANCZOS)
newimg.save(next_frame, "PNG")
# Fall back to URL if b64_json not available
elif hasattr(response.data[0], 'url') and response.data[0].url:
image_url = response.data[0].url
r = requests.get(image_url)
r.raise_for_status()
with Image.open(io.BytesIO(r.content)) as newimg:
w, h = newimg.size
w += w % 2
h += h % 2
if (w, h) != newimg.size:
newimg = newimg.resize((w, h), Image.LANCZOS)
newimg.save(next_frame, "PNG")
else:
print("Error: No usable image data in response")
exit(1)
except Exception as e:
print(f"Error: Failed to generate frame: {e}")
exit(1)
# Update current frame for next iteration
current_frame = next_frame
time.sleep(1) # Small delay to avoid rate-limiting
# All frames ready, create video with ffmpeg
try:
with Image.open(first_frame) as im0:
w, h = im0.size
w += w % 2
h += h % 2
except Exception as e:
print(f"Error: Failed to read the first frame to determine dimensions: {e}")
exit(1)
# Build ffmpeg command
cmd = [
"ffmpeg",
"-y",
"-framerate", str(args.fps),
"-i", f"{frames_dir}/frame_%04d.png",
"-vf", f"scale={w}:{h},fps=24",
"-c:v", "libx264",
"-profile:v", "main",
"-preset", "medium",
"-crf", "23",
"-pix_fmt", "yuv420p",
"-g", "1",
"-keyint_min", "1",
"-sc_threshold", "0",
args.output,
]
print("Running ffmpeg:", " ".join(cmd))
try:
subprocess.run(cmd, check=True)
print(f"Video created: {args.output}")
except subprocess.CalledProcessError as e:
print(f"ffmpeg error: {e}")
exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment