-
-
Save taka-wang/7753871167bc7502226fcfec54cbab29 to your computer and use it in GitHub Desktop.
Script that creates subtitles (closed captions) for all MP4 video files in your current directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sebastian Raschka 09/24/2022 | |
# Create a new conda environment and packages | |
# conda create -n whisper python=3.9 | |
# conda activate whisper | |
# conda install mlxtend -c conda-forge | |
# Install ffmpeg | |
# macOS & homebrew | |
# brew install ffmpeg | |
# Ubuntu | |
# sudo apt-get install ffmpeg | |
# Install whisper | |
# from repo https://github.com/openai/whisper | |
# pip install git+https://github.com/openai/whisper.git | |
import os | |
import os.path as osp | |
from mlxtend.file_io import find_files | |
from mlxtend.utils import Counter | |
all_videos = find_files(substring=".mp4", path="./", recursive=True) | |
print("Example path:", all_videos[0]) | |
print("Number of videos to process:", len(all_videos)) | |
audio_outdir = "./extracted_audio" | |
subtitle_outdir = "./generated_subtitles" | |
for this_dir in (audio_outdir, subtitle_outdir): | |
if not osp.exists(this_dir): | |
os.mkdir(this_dir) | |
cnt = Counter() | |
for v in all_videos: | |
base, ext = osp.splitext(v) | |
aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac" | |
# exctract audio file from video | |
os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ") | |
os.system( | |
f"whisper {aac_file_out} --model medium --language English --output_dir {subtitle_outdir} --verbose False" | |
) | |
cnt.update() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment