Skip to content

Instantly share code, notes, and snippets.

@FSund
Created May 14, 2025 08:02
Show Gist options
  • Select an option

  • Save FSund/eebf3b1ac5ab5e9fecba21b26d7b4fa9 to your computer and use it in GitHub Desktop.

Select an option

Save FSund/eebf3b1ac5ab5e9fecba21b26d7b4fa9 to your computer and use it in GitHub Desktop.
Split an MKA file with chapters into separate tracks with metadata.
#!/usr/bin/env python3
"""
Split an MKA file with chapters into separate tracks with metadata.
This script:
1. Extracts chapter information
2. Splits the file into separate MKA files
3. Adds metadata to each file
"""
import os
import sys
import subprocess
import argparse
import xml.etree.ElementTree as ET
import re
import tempfile
def run_command(command, silent=False):
"""Run a shell command and return output"""
if not silent:
print(f"Running: {command}")
result = subprocess.run(command, shell=True, text=True, capture_output=True)
if result.returncode != 0:
print(f"Error executing command: {command}")
print(f"Error: {result.stderr}")
sys.exit(1)
return result.stdout.strip()
def extract_chapters(input_file):
"""Extract chapter information to XML"""
xml_file = "chapters.xml"
run_command(f'mkvextract "{input_file}" chapters "{xml_file}"')
return xml_file
def parse_chapter_info(xml_file):
"""Parse chapter XML file and return chapter info"""
try:
tree = ET.parse(xml_file)
root = tree.getroot()
chapters = []
for edition in root.findall('.//EditionEntry'):
for chapter_atom in edition.findall('.//ChapterAtom'):
start_time = chapter_atom.find('.//ChapterTimeStart').text
# Get chapter name if available, otherwise use "Track X"
chapter_display = chapter_atom.find('.//ChapterDisplay')
if chapter_display is not None and chapter_display.find('.//ChapterString') is not None:
name = chapter_display.find('.//ChapterString').text
else:
name = f"Track {len(chapters) + 1}"
chapters.append({
'start_time': start_time,
'name': name
})
return chapters
except Exception as e:
print(f"Error parsing chapter XML: {e}")
sys.exit(1)
def split_mka_by_chapters(input_file):
"""Split MKA file by chapters"""
output_pattern = "split_%03d.mka"
run_command(f'mkvmerge -o "{output_pattern}" --split chapters:all "{input_file}"')
# Get list of created files
output_files = []
i = 1
while True:
file_name = f"split_{i:03d}.mka"
if not os.path.exists(file_name):
break
output_files.append(file_name)
i += 1
return output_files
def add_metadata_to_mka(input_files, chapters, artist_name, album_name, year):
"""Add metadata to MKA files using mkvpropedit"""
output_files = []
for i, input_file in enumerate(input_files):
# Create a safe filename from the chapter name
track_name = chapters[i]['name']
safe_name = re.sub(r'[^a-zA-Z0-9 ]', '_', track_name)
track_num = i + 1
total_tracks = len(input_files)
# Create output filename
output_file = f"{track_num:02d}. {safe_name}.mka"
# First, rename the file
run_command(f'cp "{input_file}" "{output_file}"')
# Create XML file with metadata
with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as meta_file:
meta_file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
meta_file.write('<!DOCTYPE Tags SYSTEM "matroskatags.dtd">\n')
meta_file.write('<Tags>\n')
# Album-level tag (TargetTypeValue 50)
meta_file.write(' <Tag>\n')
meta_file.write(' <Targets>\n')
meta_file.write(' <TargetTypeValue>50</TargetTypeValue>\n') # ALBUM
meta_file.write(' </Targets>\n')
# Add album name
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>TITLE</Name>\n')
meta_file.write(f' <String>{album_name}</String>\n')
meta_file.write(' </Simple>\n')
# Add artist (album artist)
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>ARTIST</Name>\n')
meta_file.write(f' <String>{artist_name}</String>\n')
meta_file.write(' </Simple>\n')
# Add total tracks to album level
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>TOTAL_PARTS</Name>\n')
meta_file.write(f' <String>{total_tracks}</String>\n')
meta_file.write(' </Simple>\n')
# Add year if provided
if year:
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>DATE_RELEASED</Name>\n')
meta_file.write(f' <String>{year}</String>\n')
meta_file.write(' </Simple>\n')
meta_file.write(' </Tag>\n')
# Track-level tag (TargetTypeValue 30)
meta_file.write(' <Tag>\n')
meta_file.write(' <Targets>\n')
meta_file.write(' <TargetTypeValue>30</TargetTypeValue>\n') # TRACK/SONG
meta_file.write(' </Targets>\n')
# Add track title
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>TITLE</Name>\n')
meta_file.write(f' <String>{track_name}</String>\n')
meta_file.write(' </Simple>\n')
# Add artist (can be the same as album artist)
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>ARTIST</Name>\n')
meta_file.write(f' <String>{artist_name}</String>\n')
meta_file.write(' </Simple>\n')
# Add track number
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>PART_NUMBER</Name>\n')
meta_file.write(f' <String>{track_num}</String>\n')
meta_file.write(' </Simple>\n')
meta_file.write(' <Simple>\n')
meta_file.write(' <Name>TOTAL_PARTS</Name>\n')
meta_file.write(f' <String>{total_tracks}</String>\n')
meta_file.write(' </Simple>\n')
meta_file.write(' </Tag>\n')
meta_file.write('</Tags>\n')
meta_file_name = meta_file.name
# Apply metadata with mkvpropedit
run_command(f'mkvpropedit "{output_file}" --tags global:"{meta_file_name}"')
os.unlink(meta_file_name) # Delete the temporary XML file
output_files.append(output_file)
return output_files
def cleanup(files_to_remove):
"""Clean up temporary files"""
for file in files_to_remove:
if os.path.exists(file):
os.unlink(file)
def main():
parser = argparse.ArgumentParser(description='Split MKA file with chapters into separate tracks with metadata')
parser.add_argument('input_file', help='Input MKA file with chapters')
parser.add_argument('--artist', required=True, help='Artist name')
parser.add_argument('--album', required=True, help='Album name')
parser.add_argument('--year', help='Album year')
parser.add_argument('--keep-temp', action='store_true', help='Keep temporary files', default=False)
args = parser.parse_args()
# 1. Extract chapter information
print(f"Processing {args.input_file}...")
chapter_xml = extract_chapters(args.input_file)
chapters = parse_chapter_info(chapter_xml)
print(f"Found {len(chapters)} chapters/tracks:")
for i, chapter in enumerate(chapters):
print(f" {i+1}. {chapter['name']} (starts at {chapter['start_time']})")
# 2. Split MKA file by chapters
print("\nSplitting file by chapters...")
split_files = split_mka_by_chapters(args.input_file)
# 3. Add metadata to each MKA file
print("\nAdding metadata to MKA files...")
output_files = add_metadata_to_mka(split_files, chapters, args.artist, args.album, args.year)
# 4. Clean up temporary files
if not args.keep_temp:
print("\nCleaning up temporary files...")
cleanup([chapter_xml] + split_files)
print("\nDone! Created the following files:")
for file in output_files:
print(f" {file}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment