Created
October 8, 2024 12:19
-
-
Save si3mshady/84691195f9151a903f761bbbb59817c4 to your computer and use it in GitHub Desktop.
This Python script, video_subtitle_transformer.py, is designed to extract audio from video files, transcribe and translate the audio into a specified language using OpenAI's Whisper API, and then generate and embed timed subtitles directly into the video.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import moviepy.editor as mp | |
import subprocess | |
from openai import OpenAI | |
from pydub import AudioSegment | |
# Set OpenAI API Key | |
key = os.environ.get("OPENAI_API_KEY") | |
if not key: | |
raise ValueError("OPENAI_API_KEY environment variable not set") | |
client = OpenAI(api_key=key) | |
def extract_audio_from_video(input_video, output_audio): | |
video = mp.VideoFileClip(input_video) | |
video.audio.write_audiofile(output_audio) | |
print(f"Audio extracted: {os.path.exists(output_audio)}") | |
return video.duration | |
def transcribe_and_translate_audio(audio_file, src_lang="hi", target_lang="en"): | |
MAX_CHUNK_SIZE = 24 * 1024 * 1024 # 24 MB | |
audio = AudioSegment.from_wav(audio_file) | |
duration_ms = len(audio) | |
chunk_duration_ms = (MAX_CHUNK_SIZE / len(audio.raw_data)) * duration_ms | |
translated_text = "" | |
for i, chunk_start in enumerate(range(0, duration_ms, int(chunk_duration_ms))): | |
chunk_end = min(chunk_start + int(chunk_duration_ms), duration_ms) | |
chunk = audio[chunk_start:chunk_end] | |
chunk_file = f"temp_chunk_{i}.wav" | |
chunk.export(chunk_file, format="wav") | |
print(f"Processing chunk {i+1}, size: {os.path.getsize(chunk_file)} bytes") | |
with open(chunk_file, "rb") as audio_chunk: | |
response = client.audio.translations.create( | |
model="whisper-1", | |
file=audio_chunk, | |
response_format="text" | |
) | |
translated_text += response + " " | |
os.remove(chunk_file) | |
return translated_text.strip() | |
def create_timed_subtitles(translated_text, video_duration): | |
words = translated_text.split() | |
total_words = len(words) | |
words_per_second = total_words / video_duration | |
subtitles = [] | |
current_subtitle = "" | |
word_count = 0 | |
start_time = 0 | |
for word in words: | |
current_subtitle += word + " " | |
word_count += 1 | |
if word_count >= words_per_second * 3 or word.endswith('.'): | |
end_time = start_time + (word_count / words_per_second) | |
subtitles.append((start_time, end_time, current_subtitle.strip())) | |
current_subtitle = "" | |
word_count = 0 | |
start_time = end_time | |
return subtitles | |
def format_time(seconds): | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds = seconds % 60 | |
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}" | |
def create_srt_content(subtitles): | |
srt_content = "" | |
for i, (start, end, text) in enumerate(subtitles, 1): | |
srt_content += f"{i}\n{format_time(start)} --> {format_time(end)}\n{text}\n\n" | |
return srt_content | |
def add_subtitles_to_video(input_video, srt_content, output_video): | |
srt_file = "temp_subtitles.srt" | |
with open(srt_file, "w", encoding="utf-8") as f: | |
f.write(srt_content) | |
command = [ | |
"ffmpeg", | |
"-i", input_video, | |
"-vf", f"subtitles={srt_file}", | |
"-c:a", "copy", | |
output_video | |
] | |
result = subprocess.run(command, capture_output=True, text=True) | |
if result.returncode != 0: | |
print(f"FFmpeg Error: {result.stderr}") | |
else: | |
print("Subtitles added successfully.") | |
print(f"Output video created: {os.path.exists(output_video)}") | |
os.remove(srt_file) | |
def process_video(input_video, output_video, src_lang="hi", target_lang="en"): | |
audio_file = "temp_audio.wav" | |
print(f"Extracting audio from video: {input_video}") | |
video_duration = extract_audio_from_video(input_video, audio_file) | |
print("Transcribing and translating audio...") | |
translated_text = transcribe_and_translate_audio(audio_file, src_lang, target_lang) | |
print("Creating timed subtitles...") | |
timed_subtitles = create_timed_subtitles(translated_text, video_duration) | |
print("Creating SRT content...") | |
srt_content = create_srt_content(timed_subtitles) | |
print("Adding subtitles to video...") | |
add_subtitles_to_video(input_video, srt_content, output_video) | |
# Clean up temporary files | |
os.remove(audio_file) | |
if os.path.exists(output_video): | |
print(f"Process completed! Output saved to {output_video}") | |
else: | |
print(f"Error: Output file not created at {output_video}") | |
if __name__ == "__main__": | |
input_video = "fieldmap_hindi.mp4" | |
output_video = "fieldmap_hindi_subtitled.mp4" | |
process_video(input_video, output_video, src_lang="hi", target_lang="en") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment