Skip to content

Instantly share code, notes, and snippets.

@si3mshady
Created October 8, 2024 15:21
Show Gist options
  • Save si3mshady/8926d9063288c0bb7722cc03626bf8a3 to your computer and use it in GitHub Desktop.
Save si3mshady/8926d9063288c0bb7722cc03626bf8a3 to your computer and use it in GitHub Desktop.
This Python script uses AI to translate videos from one language to another, including both audio and subtitles

Video Translator

This Python script uses AI to translate videos from one language to another, including both audio and subtitles.

Features

  • Extracts audio from input video
  • Transcribes and translates audio using OpenAI's Whisper model
  • Generates timed subtitles in the target language
  • Creates Text-to-Speech (TTS) audio in the target language
  • Combines original video with translated audio and subtitles

Requirements

  • Python 3.7+
  • OpenAI API key
  • FFmpeg
  • Required Python libraries: moviepy, ffmpeg-python, openai, pydub

Setup

  1. Clone this repository
  2. Install required libraries: pip install moviepy ffmpeg-python openai pydub
  3. Set your OpenAI API key as an environment variable: export OPENAI_API_KEY='your-api-key-here'

Usage

  1. Place your input video in the same directory as the script
  2. Run the script: python video_translator.py
  3. The translated video will be saved as 'output_video_translated.mp4'

Customization

  • Modify the src_lang and target_lang parameters in the process_video function call to change the source and target languages
  • Adjust the input and output video file names as needed

Note: This script requires implementation of several helper functions. Refer to the full version of the script for complete functionality.

import os
import moviepy.editor as mp
import ffmpeg
from openai import OpenAI
from pydub import AudioSegment
# Set OpenAI API Key
key = "sk-"
client = OpenAI(api_key=key)
def extract_audio_from_video(input_video, output_audio):
video = mp.VideoFileClip(input_video)
video.audio.write_audiofile(output_audio)
print(f"Audio extracted: {os.path.exists(output_audio)}")
return video.duration
def transcribe_and_translate_audio(audio_file, src_lang="hi", target_lang="en"):
MAX_CHUNK_SIZE = 24 * 1024 * 1024 # 24 MB
audio = AudioSegment.from_wav(audio_file)
duration_ms = len(audio)
chunk_duration_ms = (MAX_CHUNK_SIZE / len(audio.raw_data)) * duration_ms
translated_text = ""
for i, chunk_start in enumerate(range(0, duration_ms, int(chunk_duration_ms))):
chunk_end = min(chunk_start + int(chunk_duration_ms), duration_ms)
chunk = audio[chunk_start:chunk_end]
chunk_file = f"temp_chunk_{i}.wav"
chunk.export(chunk_file, format="wav")
print(f"Processing chunk {i+1}, size: {os.path.getsize(chunk_file)} bytes")
with open(chunk_file, "rb") as audio_chunk:
response = client.audio.translations.create(
model="whisper-1",
file=audio_chunk,
response_format="text"
)
translated_text += response + " "
os.remove(chunk_file)
return translated_text.strip()
def create_timed_subtitles(translated_text, video_duration):
words = translated_text.split()
total_words = len(words)
words_per_second = total_words / video_duration
subtitles = []
current_subtitle = ""
word_count = 0
start_time = 0
for word in words:
current_subtitle += word + " "
word_count += 1
if word_count >= words_per_second * 3 or word.endswith('.'): # Create a new subtitle every 3 seconds or at the end of a sentence
end_time = start_time + (word_count / words_per_second)
subtitles.append((start_time, end_time, current_subtitle.strip()))
current_subtitle = ""
word_count = 0
start_time = end_time
return subtitles
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
def create_srt_content(subtitles):
srt_content = ""
for i, (start, end, text) in enumerate(subtitles, 1):
srt_content += f"{i}\n"
srt_content += f"{format_time(start)} --> {format_time(end)}\n"
srt_content += f"{text}\n\n"
return srt_content
def generate_tts_audio(translated_text, output_audio_file):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=translated_text
)
response.stream_to_file(output_audio_file)
print(f"TTS audio generated: {output_audio_file}")
def add_subtitles_and_replace_audio(input_video, srt_content, new_audio_file, output_video):
srt_file = "temp_subtitles.srt"
with open(srt_file, "w", encoding="utf-8") as f:
f.write(srt_content)
try:
video = ffmpeg.input(input_video)
audio = ffmpeg.input(new_audio_file)
(
ffmpeg
.concat(
video.filter('subtitles', srt_file),
audio,
v=1,
a=1
)
.output(output_video)
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
print("\nSubtitles added and audio replaced successfully.")
print(f"Output video created: {os.path.exists(output_video)}")
except ffmpeg.Error as e:
print(f"\nFFmpeg Error: {e.stderr.decode()}")
finally:
os.remove(srt_file)
def process_video(input_video, output_video, src_lang="hi", target_lang="en"):
audio_file = "temp_audio.wav"
tts_audio_file = "temp_tts_audio.mp3"
print(f"Extracting audio from video: {input_video}")
video_duration = extract_audio_from_video(input_video, audio_file)
print("Transcribing and translating audio...")
translated_text = transcribe_and_translate_audio(audio_file, src_lang, target_lang)
print("Creating timed subtitles...")
timed_subtitles = create_timed_subtitles(translated_text, video_duration)
print("Creating SRT content...")
srt_content = create_srt_content(timed_subtitles)
print("Generating TTS audio...")
generate_tts_audio(translated_text, tts_audio_file)
print("Adding subtitles and replacing audio in video...")
add_subtitles_and_replace_audio(input_video, srt_content, tts_audio_file, output_video)
# Clean up temporary files
os.remove(audio_file)
os.remove(tts_audio_file)
if os.path.exists(output_video):
print(f"Process completed! Output saved to {output_video}")
else:
print(f"Error: Output file not created at {output_video}")
if __name__ == "__main__":
input_video = "fieldmap_hindi.mp4"
output_video = "fieldmap_hindi_tts.mp4"
process_video(input_video, output_video, src_lang="hi", target_lang="en")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment