Forked from rasbt/video-subtitles-via-whisper.py
Last active
October 25, 2022 16:08
-
-
Save kcarnold/b3d9ffd02d69bfdf3f2c72f12d831d1c to your computer and use it in GitHub Desktop.
Script that creates subtitles (closed captions) for all MP4 video files in your current directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sebastian Raschka 09/24/2022 | |
# Fixed to avoid problems with spaces and other special characters in filenames, Ken Arnold 10/25/2022 | |
# | |
# Create a new conda environment and packages | |
# conda create -n whisper python=3.9 | |
# conda activate whisper | |
# conda install mlxtend -c conda-forge | |
# Install ffmpeg | |
# macOS & homebrew | |
# brew install ffmpeg | |
# Ubuntu | |
# sudo apt-get ffmpeg | |
# Install whisper | |
# from repo https://github.com/openai/whisper | |
# pip install git+https://github.com/openai/whisper.git | |
import subprocess | |
import os | |
import os.path as osp | |
from mlxtend.file_io import find_files | |
from mlxtend.utils import Counter | |
all_videos = find_files(substring=".mp4", path="./", recursive=True) | |
print("Example path:", all_videos[0]) | |
print("Number of videos to process:", len(all_videos)) | |
audio_outdir = "./extracted_audio" | |
subtitle_outdir = "./generated_subtitles" | |
for this_dir in (audio_outdir, subtitle_outdir): | |
if not osp.exists(this_dir): | |
os.mkdir(this_dir) | |
cnt = Counter() | |
for v in all_videos: | |
base, ext = osp.splitext(v) | |
aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac" | |
# extract audio file from video | |
subprocess.run(["ffmpeg", "-i", "file:"+v, "-vn", "-acodec", "copy", "file:"+aac_file_out]) | |
subprocess.run(["whisper", aac_file_out, "--model", "medium", "--language", "English", | |
"--output_dir", subtitle_outdir, "--verbose", "False"]) | |
cnt.update() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment