Forked from sebdi/gist:fede0f15c6e47737dc48089720d9ee36
Created
December 30, 2025 20:19
-
-
Save datavudeja/985587c768f7280a62223d643d1ba353 to your computer and use it in GitHub Desktop.
Python script to create a youtube video from a slide deck
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import requests | |
| from pdf2image import convert_from_path | |
| from pydub import AudioSegment | |
| from pydub.utils import make_chunks | |
| import subprocess | |
| import time | |
| # Configuration | |
| PDF_PATH = 'slide.pdf' | |
| AUDIO_JSON_PATH = 'audio.json' | |
| OUTPUT_DIR = 'output' | |
| IMAGES_DIR = os.path.join(OUTPUT_DIR, 'images') | |
| AUDIO_DIR = os.path.join(OUTPUT_DIR, 'audio') | |
| VIDEO_OUTPUT = os.path.join(OUTPUT_DIR, 'presentation') | |
| FFMPEG_PATH = 'ffmpeg' # Ensure ffmpeg is in your PATH | |
| ELEVENLABS_API_KEY = 'put your key here' # Set your API key as an environment variable | |
| ELEVENLABS_VOICE_ID = 'put your voice id here' # You can change this to a specific voice ID if desired | |
| # Global variable to specify slides to exclude from conversion | |
| EXCLUDED_SLIDES = [3, 5, 15, 22, 27] # List slide numbers to exclude | |
| # Ensure output directories exist | |
| os.makedirs(IMAGES_DIR, exist_ok=True) | |
| os.makedirs(AUDIO_DIR, exist_ok=True) | |
| def convert_pdf_to_images(pdf_path, images_dir): | |
| print("Converting PDF to images...") | |
| pages = convert_from_path(pdf_path, dpi=600) | |
| image_paths = [] | |
| for i, page in enumerate(pages): | |
| slide_number = i + 1 | |
| if slide_number in EXCLUDED_SLIDES: | |
| print(f"Skipping slide {slide_number}") | |
| continue | |
| image_path = os.path.join(images_dir, f'slide_{slide_number}.png') | |
| page.save(image_path, 'PNG') | |
| image_paths.append(image_path) | |
| print(f"Saved {image_path}") | |
| return image_paths | |
| def read_audio_scripts(json_path): | |
| print("Reading audio scripts from JSON...") | |
| with open(json_path, 'r', encoding='utf-8') as f: | |
| scripts = json.load(f) | |
| print(f"Found {len(scripts['slides'])} audio scripts.") | |
| return scripts | |
| def generate_audio(script, output_path): | |
| print(f"Generating audio for: {output_path}") | |
| print(f"Script: {script}") | |
| url = "https://api.elevenlabs.io/v1/text-to-speech/" + ELEVENLABS_VOICE_ID | |
| headers = { | |
| "Content-Type": "application/json", | |
| "xi-api-key": ELEVENLABS_API_KEY | |
| } | |
| data = { | |
| "text": script, | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| if response.status_code == 200: | |
| with open(output_path, 'wb') as f: | |
| f.write(response.content) | |
| print(f"Audio saved to {output_path}") | |
| else: | |
| print(f"Failed to generate audio: {response.status_code} - {response.text}") | |
| raise Exception("Audio generation failed.") | |
| def generate_all_audios(scripts, audio_dir): | |
| audio_paths = [] | |
| for i, script in enumerate(scripts['slides']): | |
| audio_path = os.path.join(audio_dir, f'audio_{i+1}.mp3') | |
| if not os.path.exists(audio_path): | |
| print(f"Script is {scripts['slides'][i]['script']}") | |
| generate_audio(scripts['slides'][i]['script'], audio_path) | |
| # Be mindful of API rate limits | |
| time.sleep(1) # Adjust sleep time as per API guidelines | |
| else: | |
| print(f"Audio file {audio_path} already exists. Skipping generation.") | |
| audio_paths.append(audio_path) | |
| return audio_paths | |
| def generate_audio_paths(audio_dir): | |
| audio_paths = [] | |
| for i in range(len(os.listdir(audio_dir))): | |
| audio_path = os.path.join(audio_dir, f'audio_{i+1}.mp3') | |
| audio_paths.append(audio_path) | |
| return audio_paths | |
| def get_audio_duration(audio_path): | |
| audio = AudioSegment.from_file(audio_path) | |
| duration_seconds = len(audio) / 1000.0 | |
| return duration_seconds | |
| def concatenate_audio(audio_paths): | |
| # Concatenate audio files | |
| concatenated_audio = os.path.join(OUTPUT_DIR, 'concatenated_audio.mp3') | |
| combined = AudioSegment.empty() | |
| for aud_path in audio_paths: | |
| audio = AudioSegment.from_file(aud_path) | |
| combined += audio | |
| combined.export(concatenated_audio, format='mp3') | |
| print(f"Concatenated audio saved to {concatenated_audio}") | |
| def create_video_with_audio(image_paths, audio_paths, video_output, ffmpeg_path, index): | |
| print("Creating video with FFmpeg...") | |
| # Use the first image and audio file | |
| image_path = image_paths[index] | |
| audio_path = audio_paths[index] | |
| print(f"Image: {image_path}") | |
| print(f"Audio: {audio_path}") | |
| # Get the duration of the audio | |
| duration = get_audio_duration(audio_path) | |
| print(f"Audio duration: {duration} seconds") | |
| # Build FFmpeg command | |
| cmd = [ | |
| ffmpeg_path, | |
| '-y', | |
| '-loop', '1', | |
| '-i', image_path, | |
| '-i', audio_path, | |
| '-c:v', 'libx264', | |
| '-t', str(duration), | |
| '-pix_fmt', 'yuv420p', | |
| '-c:a', 'aac', | |
| '-shortest', | |
| video_output+'_'+str(index)+'.mp4' | |
| ] | |
| # Run the FFmpeg command | |
| subprocess.run(cmd, check=True) | |
| print(f"Video created at {video_output+'_'+str(index)+'.mp4'}") | |
| def concatenate_videos(video_output, ffmpeg_path): | |
| print("Concatenating videos with FFmpeg...") | |
| # Build FFmpeg command | |
| cmd = [ | |
| ffmpeg_path, | |
| '-y', | |
| '-f', 'concat', | |
| '-safe', '0', | |
| '-i', 'video_list.txt', | |
| '-c', 'copy', | |
| video_output+'.mp4' | |
| ] | |
| # Run the FFmpeg command | |
| subprocess.run(cmd, check=True) | |
| print(f"Final video created at {video_output}") | |
| def create_video(image_paths, audio_paths, video_output, ffmpeg_path): | |
| # Determine the number of videos to create | |
| num_videos = min(len(image_paths), len(audio_paths)) | |
| video_files = [] | |
| # Create individual videos with audio | |
| for idx in range(num_videos): | |
| create_video_with_audio(image_paths, audio_paths, video_output, ffmpeg_path, idx) | |
| video_filename = f"{video_output}_{idx}.mp4" | |
| video_files.append(video_filename) | |
| # Generate a file listing all video segments | |
| with open('video_list.txt', 'w') as list_file: | |
| for filename in video_files: | |
| list_file.write(f"file '{os.path.abspath(filename)}'\n") | |
| # Concatenate all the videos into one | |
| concatenate_videos(video_output, ffmpeg_path) | |
| def create_video_without_audio(image_paths, video_output, ffmpeg_path): | |
| print("Creating video with FFmpeg without audio...") | |
| # Create Video unsing images path with 10sec per image | |
| # Create a temporary file listing the images and duration | |
| list_file = os.path.join(OUTPUT_DIR, 'image_list.txt') | |
| with open(list_file, 'w') as f: | |
| for img_path in image_paths: | |
| f.write(f"file '{os.path.abspath(img_path)}'\n") | |
| f.write("duration 10\n") | |
| # Repeat the last image to ensure correct duration | |
| f.write(f"file '{os.path.abspath(image_paths[-1])}'\n") | |
| # Build FFmpeg command | |
| cmd = [ | |
| ffmpeg_path, | |
| '-y', | |
| '-f', 'concat', | |
| '-safe', '0', | |
| '-i', list_file, | |
| '-c:v', 'libx264', | |
| '-r', '30', | |
| '-pix_fmt', 'yuv420p', | |
| video_output | |
| ] | |
| # Run FFmpeg command | |
| print("Running FFmpeg command...") | |
| subprocess.run(cmd, check=True) | |
| print(f"Video created at {video_output}") | |
| def main(): | |
| # Step 1: Convert PDF slides to images | |
| image_paths = convert_pdf_to_images(PDF_PATH, IMAGES_DIR) | |
| # Step 2: Read audio scripts | |
| scripts = read_audio_scripts(AUDIO_JSON_PATH) | |
| #if len(scripts) != len(image_paths): | |
| # print("Error: The number of audio scripts does not match the number of slides.") | |
| # return | |
| # Step 3: Generate audio files using Eleven Labs API | |
| audio_paths = generate_all_audios(scripts, AUDIO_DIR) | |
| #audio_paths = generate_audio_paths(AUDIO_DIR) | |
| # Step 4: Create video using FFmpeg | |
| create_video(image_paths, audio_paths, VIDEO_OUTPUT, FFMPEG_PATH) | |
| print("All done!") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment