Last active
November 23, 2022 09:08
-
-
Save asqd/5adeb37564cb2ff4ff8d890df01afa1a to your computer and use it in GitHub Desktop.
Python script to slice large mp3 file by chunks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from pydub import AudioSegment | |
import sys | |
# pydub use ffmpeg to file manupulation | |
# don't forget to install ffmpeg | |
sys.path.append('/usr/local/bin/ffmpeg') | |
def load_songs(fn): | |
meta = {} | |
songs = [] | |
file = open_file(fn) | |
for line in file: | |
if line[0].isdigit(): | |
song = parse_song(line) | |
song.update(meta) | |
song['track'] = str(len(songs) + 1) | |
if len(songs) > 0: | |
songs[-1]['time_end'] = song['time_start'] | |
songs.append(song) | |
else: | |
if line.strip(): | |
key, value = parse_meta(line.strip()) | |
if value: | |
meta[key.lower()] = value | |
return songs | |
def open_file(fn): | |
return open(fn) | |
# returns dict song | |
def parse_song(line): | |
time, title = [string.strip() for string in line.split('-', 1)] | |
return { 'time_start': time, 'title': title } | |
# returns meta key, value | |
# Example: 'artist', 'Aerosmith' | |
def parse_meta(line): | |
return line.split(':', 1) | |
def time_to_ms(time): | |
if isinstance(time, int): | |
return time | |
coefficients = [3600000, 60000, 1000] | |
time_array = time.split(':') | |
if len(time_array) < 3: | |
time_array = ['00'] + time_array | |
sum = 0 | |
for idx, element in enumerate(time_array): | |
sum += int(element) * coefficients[idx] | |
return sum | |
def extract_song(source, song_data, output_path): | |
time_start = song_data['time_start'] | |
time_end = song_data.get('time_end') or len(source) | |
tags = extract_tags(song_data) | |
print("extracting %s \n" % song_data['title']) | |
song = source[time_to_ms(time_start):time_to_ms(time_end)] | |
file_name = compose_filename(song_data) | |
song.export(path.join(output_path, file_name), format="mp3", tags=tags) | |
def extract_tags(song_data): | |
tags = ['title', 'album', 'artist', 'year', 'track'] | |
return {tag: song_data[tag] for tag in tags if tag in song_data} | |
def compose_filename(song_data): | |
file_name = song_data['track'].zfill(2) + " - " + song_data['title'] + ".mp3" | |
return file_name.replace("/", '|') | |
fn = path.normpath('path_to_large_mp3) | |
output_dir = path.normpath('output_path') | |
## | |
# Schema looks like | |
# | |
# Artist: Panic! at the Disco | |
# Album: Pray for the Wicked Tour 2019 - Live at O2 Arena, London 2019 | |
# Year: 2019 | |
# 00:15 - (Fuck A) Silver Lining | |
# 03:03 - Don't Threaten Me With a Good Time | |
# 06:34 - Ready to Go (Get Me Out of My Mind) | |
# ... | |
## | |
schema = 'schema_path' | |
print("reading file \n") | |
load_songs_schema = load_songs(schema) | |
record = AudioSegment.from_mp3(fn) | |
for song_data in load_songs_schema: | |
extract_song(record, song_data, output_dir) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment