Last active
January 13, 2024 19:03
-
-
Save atdt/fc17d68434d3009db1eabb0fb53130c6 to your computer and use it in GitHub Desktop.
Convert OMSCS video lectures to MP3 files with chapter markers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Start by replacing all spaces with underscores | |
rename 's/\s+/_/g' **/*.mp4 | |
(bash not fish!) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# convert files to mp3, run this second | |
set -eux | |
find . -maxdepth 1 -type d -name 'P*' ! -name '*subtitles*' -exec basename {} \; | sort -n | while read -r DIRECTORY ; do | |
OUTPUT="${DIRECTORY}.mp3" | |
FILE_LIST="${DIRECTORY}/file_list" | |
FFMETADATAFILE="${DIRECTORY}/FFMETADATAFILE" | |
rm -f "$FILE_LIST" | |
# Save the current IFS (Internal Field Separator) and set it to handle newline only | |
OLDIFS=$IFS | |
IFS=$'\n' | |
# Processing each file in the directory | |
for file in $(find "$DIRECTORY" -maxdepth 1 -name '*.mp4' | sort -V); do | |
# Extracting the base name | |
base_name=$(basename "$file" .mp4) | |
# Extracting audio and converting to mp3 | |
ffmpeg -nostdin -i "$file" -q:a 0 -map a "$DIRECTORY/${base_name}.mp3" | |
# Adding file to the file list | |
echo "file '${base_name}.mp3'" >> $FILE_LIST | |
done | |
# Combine all MP3 files into one with chapters | |
ffmpeg -nostdin -f concat -safe 0 -i "$FILE_LIST" -i "$FFMETADATAFILE" -map_metadata 1 -codec copy $OUTPUT | |
IFS=$OLDIFS | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create FFMETADATA files. Run this first. | |
import os | |
import re | |
import glob | |
import subprocess | |
header_template = ''';FFMETADATA1 | |
title={title} | |
artist=OMSCS 6200 | |
''' | |
chapter_template = """ | |
[CHAPTER] | |
TIMEBASE=1/1000 | |
START={start} | |
END={end} | |
title={title}""" | |
import contextlib | |
import os | |
base_dir = os.getcwd() | |
dirs = [os.path.join(base_dir, d) for d in next(os.walk('.'))[1] if d.startswith('P') and 'subtitles' not in d] | |
for dir in dirs: | |
print(dir) | |
os.chdir(dir) | |
with open(os.path.join(dir, 'FFMETADATAFILE'), 'wt') as metadata_file: | |
lecture = re.sub(r'[ _]+', ' ', dir) | |
print(header_template.format(title=lecture), file=metadata_file) | |
files = glob.glob('*.mp4') | |
files.sort(key=lambda n: int(n.split('_')[0])) | |
start = 0 | |
for f in files: | |
title = re.match(r'\d+_-_(.*)\.mp4', f).group(1).replace('_', ' ') | |
p = subprocess.run(['ffprobe', '-i', f, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'csv=p=0'], check=True, capture_output=True) | |
duration = int(float(p.stdout.strip()) * 1000) | |
end = start + duration | |
print(chapter_template.format(start=start, end=end, title=title), file=metadata_file) | |
start = end + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment