Created
April 5, 2025 19:32
-
-
Save dataserver/3f7193d22375e1023113eedc18fe82b9 to your computer and use it in GitHub Desktop.
This script splits a large MP3 file (downloaded from youtube) into individual tracks based on timestamps provided in a tracklist text file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
MP3 Splitter CLI Tool | |
This script splits a large MP3 file into individual tracks based on timestamps provided in a tracklist text file. | |
The user can specify which tracks to ignore. The script saves the resulting split tracks to a specified output directory. | |
Usage: | |
python split.py -i <input_mp3_file> -t <tracklist_file> -o <output_directory> | |
Arguments: | |
-i, --input Path to the large MP3 input file (e.g., "large_file.mp3"). | |
-t, --tracklist Path to the tracklist text file containing timestamps and track names (e.g., "tracklist.txt"). | |
-o, --output Path to the directory where the split tracks will be saved (e.g., "output_dir"). | |
Tracklist Format: | |
The tracklist file should contain one line per track with the timestamp and track name: | |
<timestamp> <track_name> | |
For example: | |
00:00 Track Name A | |
02:36 Track Name B | |
05:05 Track Name C | |
08:02 Track Name D | |
Each timestamp is in the format MM:SS, indicating the start time of each track. | |
Example: | |
python split.py -i large_file.mp3 -t tracklist.txt -o output | |
Requirements: | |
- pydub (https://pydub.com/) | |
- ffmpeg (required by pydub for MP3 file handling) | |
""" | |
import os | |
import re | |
import argparse | |
from pathlib import Path | |
from pydub import AudioSegment | |
# Set the FFmpeg binary path for Windows | |
os.environ["FFMPEG_BINARY"] = ( | |
r"D:\python\mp3_spliter\bin\ffmpeg.exe" # Adjust for Windows | |
) | |
# Function to convert time string (MM:SS) to milliseconds | |
def time_to_ms(time_str: str) -> int: | |
minutes, seconds = map(int, time_str.split(":")) | |
return (minutes * 60 + seconds) * 1000 # Convert to milliseconds | |
# Function to read the timestamps and track names from the text file | |
def read_timestamps(file_path: Path) -> list[tuple[str, str]]: | |
timestamps = [] | |
try: | |
with open(file_path, "r", encoding="utf-8") as file: | |
for line in file: | |
parts = line.strip().split(maxsplit=1) | |
if len(parts) == 2: | |
timestamp, track_name = parts | |
timestamps.append((timestamp, track_name)) | |
except UnicodeDecodeError: | |
print("UnicodeDecodeError encountered. Trying ISO-8859-1 encoding...") | |
with open(file_path, "r", encoding="ISO-8859-1") as file: | |
for line in file: | |
parts = line.strip().split(maxsplit=1) | |
if len(parts) == 2: | |
timestamp, track_name = parts | |
timestamps.append((timestamp, track_name)) | |
return timestamps | |
# Function to sanitize track names to be valid filenames | |
def sanitize_filename(track_name: str) -> str: | |
sanitized = re.sub(r'[<>:"/\\|?*]', "_", track_name) | |
return sanitized | |
# Function to prompt user to select which tracks to ignore | |
def prompt_ignore_tracks(timestamps: list[tuple[str, str]]) -> list[int]: | |
print("Here is a list of all track names:") | |
for i, (_, track_name) in enumerate(timestamps, 1): | |
print(f"{i}: {track_name}") | |
ignore_input = input( | |
"Enter the numbers of tracks to ignore (comma separated, e.g., 2,5,8), or press Enter to skip: " | |
) | |
ignore_indices = [] | |
if ignore_input.strip(): | |
ignore_indices = [ | |
int(num.strip()) - 1 | |
for num in ignore_input.split(",") | |
if num.strip().isdigit() | |
] | |
return ignore_indices | |
# Function to split the mp3 file based on timestamps from the text file | |
def split_mp3( | |
mp3_path: Path, | |
timestamps: list[tuple[str, str]], | |
output_folder: Path, | |
ignore_indices: list[int], | |
): | |
audio = AudioSegment.from_mp3(mp3_path) | |
output_folder.mkdir(parents=True, exist_ok=True) | |
total_duration_ms = len(audio) | |
track_number_padding = 2 if len(timestamps) <= 99 else 3 | |
previous_timestamp_ms = 0 | |
for i, (timestamp, track_name) in enumerate(timestamps): | |
if i in ignore_indices: | |
print(f"Skipping {track_name}.") | |
continue | |
timestamp_ms = time_to_ms(timestamp) | |
if i + 1 == len(timestamps): | |
end_timestamp_ms = total_duration_ms | |
else: | |
end_timestamp_ms = time_to_ms(timestamps[i + 1][0]) | |
segment = audio[previous_timestamp_ms:end_timestamp_ms] | |
track_number = str(i + 1).zfill(track_number_padding) | |
sanitized_track_name = sanitize_filename(track_name) | |
output_file = output_folder / f"{track_number} - {sanitized_track_name}.mp3" | |
segment.export(output_file, format="mp3") | |
previous_timestamp_ms = timestamp_ms | |
print(f"Saved {track_number} - {sanitized_track_name}.mp3") | |
print("Splitting complete.") | |
# Main function with argument parsing using argparse | |
if __name__ == "__main__": | |
# Set up the argument parser | |
parser = argparse.ArgumentParser(description="Split a large MP3 file into tracks based on a tracklist.") | |
parser.add_argument("-i", "--input", required=True, help="Path to the large MP3 input file.") | |
parser.add_argument("-t", "--tracklist", required=True, help="Path to the tracklist text file.") | |
parser.add_argument("-o", "--output", required=True, help="Directory to save the split tracks.") | |
# Parse the arguments | |
args = parser.parse_args() | |
# Convert input paths to Path objects | |
mp3_path = Path(args.input) | |
timestamps_file = Path(args.tracklist) | |
output_folder = Path(args.output) | |
# Read the timestamps and track names from the text file | |
timestamps = read_timestamps(timestamps_file) | |
# Prompt the user to select which tracks to ignore | |
ignore_indices = prompt_ignore_tracks(timestamps) | |
# Split the MP3 based on the timestamps and the ignore list | |
split_mp3(mp3_path, timestamps, output_folder, ignore_indices) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment