Skip to content

Instantly share code, notes, and snippets.

@sebdi
Last active December 30, 2025 20:19
Show Gist options
  • Select an option

  • Save sebdi/fede0f15c6e47737dc48089720d9ee36 to your computer and use it in GitHub Desktop.

Select an option

Save sebdi/fede0f15c6e47737dc48089720d9ee36 to your computer and use it in GitHub Desktop.
Python script to create a youtube video from a slide deck
import os
import json
import requests
from pdf2image import convert_from_path
from pydub import AudioSegment
from pydub.utils import make_chunks
import subprocess
import time
# Configuration
PDF_PATH = 'slide.pdf'
AUDIO_JSON_PATH = 'audio.json'
OUTPUT_DIR = 'output'
IMAGES_DIR = os.path.join(OUTPUT_DIR, 'images')
AUDIO_DIR = os.path.join(OUTPUT_DIR, 'audio')
VIDEO_OUTPUT = os.path.join(OUTPUT_DIR, 'presentation')
FFMPEG_PATH = 'ffmpeg' # Ensure ffmpeg is in your PATH
ELEVENLABS_API_KEY = 'put your key here' # Set your API key as an environment variable
ELEVENLABS_VOICE_ID = 'put your voice id here' # You can change this to a specific voice ID if desired
# Global variable to specify slides to exclude from conversion
EXCLUDED_SLIDES = [3, 5, 15, 22, 27] # List slide numbers to exclude
# Ensure output directories exist
os.makedirs(IMAGES_DIR, exist_ok=True)
os.makedirs(AUDIO_DIR, exist_ok=True)
def convert_pdf_to_images(pdf_path, images_dir):
print("Converting PDF to images...")
pages = convert_from_path(pdf_path, dpi=600)
image_paths = []
for i, page in enumerate(pages):
slide_number = i + 1
if slide_number in EXCLUDED_SLIDES:
print(f"Skipping slide {slide_number}")
continue
image_path = os.path.join(images_dir, f'slide_{slide_number}.png')
page.save(image_path, 'PNG')
image_paths.append(image_path)
print(f"Saved {image_path}")
return image_paths
def read_audio_scripts(json_path):
print("Reading audio scripts from JSON...")
with open(json_path, 'r', encoding='utf-8') as f:
scripts = json.load(f)
print(f"Found {len(scripts['slides'])} audio scripts.")
return scripts
def generate_audio(script, output_path):
print(f"Generating audio for: {output_path}")
print(f"Script: {script}")
url = "https://api.elevenlabs.io/v1/text-to-speech/" + ELEVENLABS_VOICE_ID
headers = {
"Content-Type": "application/json",
"xi-api-key": ELEVENLABS_API_KEY
}
data = {
"text": script,
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
with open(output_path, 'wb') as f:
f.write(response.content)
print(f"Audio saved to {output_path}")
else:
print(f"Failed to generate audio: {response.status_code} - {response.text}")
raise Exception("Audio generation failed.")
def generate_all_audios(scripts, audio_dir):
audio_paths = []
for i, script in enumerate(scripts['slides']):
audio_path = os.path.join(audio_dir, f'audio_{i+1}.mp3')
if not os.path.exists(audio_path):
print(f"Script is {scripts['slides'][i]['script']}")
generate_audio(scripts['slides'][i]['script'], audio_path)
# Be mindful of API rate limits
time.sleep(1) # Adjust sleep time as per API guidelines
else:
print(f"Audio file {audio_path} already exists. Skipping generation.")
audio_paths.append(audio_path)
return audio_paths
def generate_audio_paths(audio_dir):
audio_paths = []
for i in range(len(os.listdir(audio_dir))):
audio_path = os.path.join(audio_dir, f'audio_{i+1}.mp3')
audio_paths.append(audio_path)
return audio_paths
def get_audio_duration(audio_path):
audio = AudioSegment.from_file(audio_path)
duration_seconds = len(audio) / 1000.0
return duration_seconds
def concatenate_audio(audio_paths):
# Concatenate audio files
concatenated_audio = os.path.join(OUTPUT_DIR, 'concatenated_audio.mp3')
combined = AudioSegment.empty()
for aud_path in audio_paths:
audio = AudioSegment.from_file(aud_path)
combined += audio
combined.export(concatenated_audio, format='mp3')
print(f"Concatenated audio saved to {concatenated_audio}")
def create_video_with_audio(image_paths, audio_paths, video_output, ffmpeg_path, index):
print("Creating video with FFmpeg...")
# Use the first image and audio file
image_path = image_paths[index]
audio_path = audio_paths[index]
print(f"Image: {image_path}")
print(f"Audio: {audio_path}")
# Get the duration of the audio
duration = get_audio_duration(audio_path)
print(f"Audio duration: {duration} seconds")
# Build FFmpeg command
cmd = [
ffmpeg_path,
'-y',
'-loop', '1',
'-i', image_path,
'-i', audio_path,
'-c:v', 'libx264',
'-t', str(duration),
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-shortest',
video_output+'_'+str(index)+'.mp4'
]
# Run the FFmpeg command
subprocess.run(cmd, check=True)
print(f"Video created at {video_output+'_'+str(index)+'.mp4'}")
def concatenate_videos(video_output, ffmpeg_path):
print("Concatenating videos with FFmpeg...")
# Build FFmpeg command
cmd = [
ffmpeg_path,
'-y',
'-f', 'concat',
'-safe', '0',
'-i', 'video_list.txt',
'-c', 'copy',
video_output+'.mp4'
]
# Run the FFmpeg command
subprocess.run(cmd, check=True)
print(f"Final video created at {video_output}")
def create_video(image_paths, audio_paths, video_output, ffmpeg_path):
# Determine the number of videos to create
num_videos = min(len(image_paths), len(audio_paths))
video_files = []
# Create individual videos with audio
for idx in range(num_videos):
create_video_with_audio(image_paths, audio_paths, video_output, ffmpeg_path, idx)
video_filename = f"{video_output}_{idx}.mp4"
video_files.append(video_filename)
# Generate a file listing all video segments
with open('video_list.txt', 'w') as list_file:
for filename in video_files:
list_file.write(f"file '{os.path.abspath(filename)}'\n")
# Concatenate all the videos into one
concatenate_videos(video_output, ffmpeg_path)
def create_video_without_audio(image_paths, video_output, ffmpeg_path):
print("Creating video with FFmpeg without audio...")
# Create Video unsing images path with 10sec per image
# Create a temporary file listing the images and duration
list_file = os.path.join(OUTPUT_DIR, 'image_list.txt')
with open(list_file, 'w') as f:
for img_path in image_paths:
f.write(f"file '{os.path.abspath(img_path)}'\n")
f.write("duration 10\n")
# Repeat the last image to ensure correct duration
f.write(f"file '{os.path.abspath(image_paths[-1])}'\n")
# Build FFmpeg command
cmd = [
ffmpeg_path,
'-y',
'-f', 'concat',
'-safe', '0',
'-i', list_file,
'-c:v', 'libx264',
'-r', '30',
'-pix_fmt', 'yuv420p',
video_output
]
# Run FFmpeg command
print("Running FFmpeg command...")
subprocess.run(cmd, check=True)
print(f"Video created at {video_output}")
def main():
# Step 1: Convert PDF slides to images
image_paths = convert_pdf_to_images(PDF_PATH, IMAGES_DIR)
# Step 2: Read audio scripts
scripts = read_audio_scripts(AUDIO_JSON_PATH)
#if len(scripts) != len(image_paths):
# print("Error: The number of audio scripts does not match the number of slides.")
# return
# Step 3: Generate audio files using Eleven Labs API
audio_paths = generate_all_audios(scripts, AUDIO_DIR)
#audio_paths = generate_audio_paths(AUDIO_DIR)
# Step 4: Create video using FFmpeg
create_video(image_paths, audio_paths, VIDEO_OUTPUT, FFMPEG_PATH)
print("All done!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment