Skip to content

Instantly share code, notes, and snippets.

@si3mshady
Created October 8, 2024 12:19
Show Gist options
  • Save si3mshady/84691195f9151a903f761bbbb59817c4 to your computer and use it in GitHub Desktop.
Save si3mshady/84691195f9151a903f761bbbb59817c4 to your computer and use it in GitHub Desktop.
This Python script, video_subtitle_transformer.py, is designed to extract audio from video files, transcribe and translate the audio into a specified language using OpenAI's Whisper API, and then generate and embed timed subtitles directly into the video.
import os
import moviepy.editor as mp
import subprocess
from openai import OpenAI
from pydub import AudioSegment
# Set OpenAI API Key
key = os.environ.get("OPENAI_API_KEY")
if not key:
raise ValueError("OPENAI_API_KEY environment variable not set")
client = OpenAI(api_key=key)
def extract_audio_from_video(input_video, output_audio):
video = mp.VideoFileClip(input_video)
video.audio.write_audiofile(output_audio)
print(f"Audio extracted: {os.path.exists(output_audio)}")
return video.duration
def transcribe_and_translate_audio(audio_file, src_lang="hi", target_lang="en"):
MAX_CHUNK_SIZE = 24 * 1024 * 1024 # 24 MB
audio = AudioSegment.from_wav(audio_file)
duration_ms = len(audio)
chunk_duration_ms = (MAX_CHUNK_SIZE / len(audio.raw_data)) * duration_ms
translated_text = ""
for i, chunk_start in enumerate(range(0, duration_ms, int(chunk_duration_ms))):
chunk_end = min(chunk_start + int(chunk_duration_ms), duration_ms)
chunk = audio[chunk_start:chunk_end]
chunk_file = f"temp_chunk_{i}.wav"
chunk.export(chunk_file, format="wav")
print(f"Processing chunk {i+1}, size: {os.path.getsize(chunk_file)} bytes")
with open(chunk_file, "rb") as audio_chunk:
response = client.audio.translations.create(
model="whisper-1",
file=audio_chunk,
response_format="text"
)
translated_text += response + " "
os.remove(chunk_file)
return translated_text.strip()
def create_timed_subtitles(translated_text, video_duration):
words = translated_text.split()
total_words = len(words)
words_per_second = total_words / video_duration
subtitles = []
current_subtitle = ""
word_count = 0
start_time = 0
for word in words:
current_subtitle += word + " "
word_count += 1
if word_count >= words_per_second * 3 or word.endswith('.'):
end_time = start_time + (word_count / words_per_second)
subtitles.append((start_time, end_time, current_subtitle.strip()))
current_subtitle = ""
word_count = 0
start_time = end_time
return subtitles
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
def create_srt_content(subtitles):
srt_content = ""
for i, (start, end, text) in enumerate(subtitles, 1):
srt_content += f"{i}\n{format_time(start)} --> {format_time(end)}\n{text}\n\n"
return srt_content
def add_subtitles_to_video(input_video, srt_content, output_video):
srt_file = "temp_subtitles.srt"
with open(srt_file, "w", encoding="utf-8") as f:
f.write(srt_content)
command = [
"ffmpeg",
"-i", input_video,
"-vf", f"subtitles={srt_file}",
"-c:a", "copy",
output_video
]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
print(f"FFmpeg Error: {result.stderr}")
else:
print("Subtitles added successfully.")
print(f"Output video created: {os.path.exists(output_video)}")
os.remove(srt_file)
def process_video(input_video, output_video, src_lang="hi", target_lang="en"):
audio_file = "temp_audio.wav"
print(f"Extracting audio from video: {input_video}")
video_duration = extract_audio_from_video(input_video, audio_file)
print("Transcribing and translating audio...")
translated_text = transcribe_and_translate_audio(audio_file, src_lang, target_lang)
print("Creating timed subtitles...")
timed_subtitles = create_timed_subtitles(translated_text, video_duration)
print("Creating SRT content...")
srt_content = create_srt_content(timed_subtitles)
print("Adding subtitles to video...")
add_subtitles_to_video(input_video, srt_content, output_video)
# Clean up temporary files
os.remove(audio_file)
if os.path.exists(output_video):
print(f"Process completed! Output saved to {output_video}")
else:
print(f"Error: Output file not created at {output_video}")
if __name__ == "__main__":
input_video = "fieldmap_hindi.mp4"
output_video = "fieldmap_hindi_subtitled.mp4"
process_video(input_video, output_video, src_lang="hi", target_lang="en")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment