Created
September 8, 2023 12:23
-
-
Save obskyr/36696b57596a19d37c244eb44d654b0e to your computer and use it in GitHub Desktop.
Convert chapters exported with “Export Labels…” in Audacity to an FFmpeg-compatible metadata text file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Turn an Audacity "Label Track.txt" into an FFmpeg metadata file.""" | |
import os | |
import re | |
import sys | |
LABEL_LINE_RE = re.compile(r"^(?P<start>[0-9]+(?:.[0-9]+)?)\s+(?P<end>[0-9]+(?:.[0-9]+)?)\s+(?P<title>.*)$") | |
def to_ffmetadata(labels_s): | |
chapters = [] | |
for line in labels_s.splitlines(): | |
line = line.strip() | |
if not line: | |
continue | |
m = LABEL_LINE_RE.match(line) | |
if m is None: | |
raise ValueError("Malformed label track data.") | |
start = float(m.group('start')) | |
end = float(m.group('end')) | |
title = escape(m.group('title')) | |
chapters.append((start, end, title)) | |
chapters = sorted(chapters, key=lambda t: t[0]) | |
s = ";FFMETADATA1\n" | |
# This is according to https://dbojan.github.io/howto_pc/media,%20How%20to%20add%20chapter%20marks%20to%20audio%20books,%20using%20opus%20codec.htm, | |
# but it seems that it doesn't actually work. Judging by a recent FFmpeg | |
# issue, https://trac.ffmpeg.org/ticket/7532, Ogg might use the regular | |
# [CHAPTER] tags in a future version… perhaps? So this may be a dead end. | |
# # For Ogg files. | |
# s += "\n" | |
# for i, (start, end, title) in enumerate(chapters): | |
# s += f"CHAPTER{i:03}={to_timestamp(start)}\nCHAPTER{i:03}NAME={title}\n" | |
# For MPEG-4 files. | |
for start, end, title in chapters: | |
start = round(start * 1000) | |
end = round(end * 1000) | |
s += f"\n[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle={title}\n" | |
return s | |
ESCAPE_RE = re.compile(r"[=;#\n]") | |
def escape(s): | |
return ESCAPE_RE.sub("\\\g<0>", s) | |
def to_timestamp(seconds): | |
hours, seconds = divmod(seconds, 3600) | |
minutes, seconds = divmod(seconds, 60) | |
return f"{hours:.0f}:{minutes:02.0f}:{seconds:06.3f}" | |
def main(*argv): | |
script_name = os.path.basename(__file__) | |
try: | |
in_path = argv[0] | |
except IndexError: | |
print(f"Usage: {script_name} <path to label track TXT> [output path]", file=sys.stderr) | |
return 1 | |
try: | |
out_path = argv[1] | |
except IndexError: | |
out_path = os.path.join(os.path.dirname(in_path), 'ffmetadata.txt') | |
with open(in_path, 'r', encoding='utf-8') as f: | |
s = f.read() | |
s = to_ffmetadata(s) | |
with open(out_path, 'w', encoding='utf-8') as f: | |
f.write(s) | |
print(f"Successfully wrote chapters from \"{in_path}\" to \"{out_path}\"!") | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(*sys.argv[1:])) |
I assume this was caused by ";
" in my input data.
If I escape/write ";
" in my Label Data within Audacity as "\;
", and export these Labels, then your script converts these into "\\g<0>
".
Then there is no error/warning. ffmpeg
also does not complain.
The meta data in the mp4 container is shown as "foo\g<0> bar
". So I might better find a better separator within the metadata.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm not sure if its my label data (
audacity
), the export (fromaudacity
), or this code block... but I get this error(?):But I can feed it to
ffmpeg
:output.mp4
was written, so let's test withvlc
:I've tested with a 6 hour recording and I don't know more then 40 label markers. From a first test I would say it got all label markers as chapters in the output.mp4 🤷 Thanks mate.