|
import struct |
|
import argparse |
|
import os |
|
from dataclasses import dataclass |
|
from typing import List, Tuple, Set, Optional |
|
|
|
@dataclass |
|
class MidiHeader: |
|
format_type: int |
|
num_tracks: int |
|
time_division: int |
|
|
|
@dataclass |
|
class MidiEvent: |
|
delta_time: int |
|
status: int |
|
data: bytes |
|
absolute_time: int = 0 |
|
|
|
class MidiTrack: |
|
def __init__(self): |
|
self.events: List[MidiEvent] = [] |
|
self.name: str = "" |
|
self.channels: Set[int] = set() |
|
self.duration_ticks: int = 0 |
|
|
|
def analyze_track_content(self) -> dict: |
|
has_notes = False |
|
has_cc = False |
|
cc_patterns = [] |
|
|
|
for event in self.events: |
|
if event.status >= 0x90 and event.status <= 0x9F and event.data[1] > 0: |
|
has_notes = True |
|
elif event.status >= 0xB0 and event.status <= 0xBF: |
|
has_cc = True |
|
cc_patterns.append((event.data[0], event.data[1], event.absolute_time)) |
|
|
|
is_decorative = False |
|
if has_cc and len(cc_patterns) > 20: |
|
time_gaps = [t2[2] - t1[2] for t1, t2 in zip(cc_patterns, cc_patterns[1:])] |
|
if len(set(time_gaps)) < len(time_gaps) / 4: |
|
is_decorative = True |
|
|
|
return { |
|
'has_notes': has_notes, |
|
'has_cc': has_cc, |
|
'is_decorative': is_decorative, |
|
'is_empty': not (has_notes or has_cc) |
|
} |
|
|
|
class MidiFile: |
|
def __init__(self, filename: str): |
|
self.filename = filename |
|
self.header: MidiHeader = None |
|
self.tracks: List[MidiTrack] = [] |
|
self.tempo: int = 500000 |
|
self._parse_file() |
|
|
|
def analyze(self, verbose: bool = False): |
|
print(f"\nAnalyzing: {self.filename}") |
|
print(f"Format: {self.header.format_type}") |
|
print(f"Tracks: {self.header.num_tracks}") |
|
print(f"Time Division: {self.header.time_division} ticks per quarter note") |
|
|
|
bpm = 60000000 / self.tempo |
|
print(f"Tempo: {self.tempo} microseconds per quarter note ({bpm:.1f} BPM)") |
|
|
|
max_duration = max(track.duration_ticks for track in self.tracks) |
|
duration_seconds = (max_duration * self.tempo) / (self.header.time_division * 1000000) |
|
minutes = int(duration_seconds // 60) |
|
seconds = duration_seconds % 60 |
|
print(f"Duration: {minutes}m {seconds:.1f}s") |
|
|
|
for i, track in enumerate(self.tracks): |
|
content = track.analyze_track_content() |
|
if content['is_empty']: |
|
print(f"\nTrack {i}: Empty/Text only") |
|
elif content['is_decorative']: |
|
print(f"\nTrack {i}: Likely decorative/ASCII art") |
|
else: |
|
print(f"\nTrack {i}:") |
|
if track.name: |
|
print(f" Name: {track.name}") |
|
if track.channels: |
|
print(f" Channels: {sorted(list(track.channels))}") |
|
if verbose: |
|
print(" Events:") |
|
for event in track.events: |
|
if event.status == 0xFF: |
|
event_type = event.data[0] |
|
if event_type == 0x03: |
|
print(f" {event.absolute_time}: Track Name: {event.data[1:].decode('ascii', errors='ignore')}") |
|
elif event_type == 0x51: |
|
tempo = struct.unpack('>I', b'\x00' + event.data[1:])[0] |
|
print(f" {event.absolute_time}: Tempo: {tempo} ({60000000/tempo:.1f} BPM)") |
|
else: |
|
print(f" {event.absolute_time}: Meta Event: {hex(event_type)}") |
|
elif event.status >= 0x80 and event.status <= 0xEF: |
|
print(f" {event.absolute_time}: MIDI Event: {hex(event.status)} Channel: {event.status & 0x0F}") |
|
|
|
def _read_variable_length(self, data: bytes, offset: int) -> Tuple[int, int]: |
|
value = 0 |
|
bytes_read = 0 |
|
while True: |
|
byte = data[offset + bytes_read] |
|
value = (value << 7) | (byte & 0x7F) |
|
bytes_read += 1 |
|
if not byte & 0x80: |
|
break |
|
return value, bytes_read |
|
|
|
def _parse_file(self): |
|
with open(self.filename, 'rb') as f: |
|
data = f.read() |
|
|
|
if data[:4] != b'MThd': |
|
raise ValueError("Not a valid MIDI file") |
|
|
|
header_len = struct.unpack('>I', data[4:8])[0] |
|
format_type, num_tracks, time_division = struct.unpack('>HHH', data[8:14]) |
|
self.header = MidiHeader(format_type, num_tracks, time_division) |
|
|
|
offset = 8 + header_len |
|
for i in range(num_tracks): |
|
if data[offset:offset+4] != b'MTrk': |
|
raise ValueError(f"Invalid track header at track {i}") |
|
|
|
track_len = struct.unpack('>I', data[offset+4:offset+8])[0] |
|
track_data = data[offset+8:offset+8+track_len] |
|
track = MidiTrack() |
|
|
|
pos = 0 |
|
absolute_time = 0 |
|
running_status = None |
|
|
|
while pos < len(track_data): |
|
delta_time, vl_bytes = self._read_variable_length(track_data, pos) |
|
pos += vl_bytes |
|
absolute_time += delta_time |
|
|
|
status = track_data[pos] |
|
pos += 1 |
|
|
|
if status == 0xFF: |
|
meta_type = track_data[pos] |
|
pos += 1 |
|
length, vl_bytes = self._read_variable_length(track_data, pos) |
|
pos += vl_bytes |
|
event_data = track_data[pos:pos+length] |
|
pos += length |
|
|
|
if meta_type == 0x03: |
|
track.name = event_data.decode('ascii', errors='ignore') |
|
elif meta_type == 0x51: |
|
self.tempo = struct.unpack('>I', b'\x00' + event_data)[0] |
|
|
|
track.events.append(MidiEvent(delta_time, status, bytes([meta_type]) + event_data, absolute_time)) |
|
|
|
elif status == 0xF0 or status == 0xF7: |
|
length, vl_bytes = self._read_variable_length(track_data, pos) |
|
pos += vl_bytes |
|
event_data = track_data[pos:pos+length] |
|
pos += length |
|
track.events.append(MidiEvent(delta_time, status, event_data, absolute_time)) |
|
|
|
else: |
|
if status & 0x80: |
|
running_status = status |
|
else: |
|
pos -= 1 |
|
status = running_status |
|
|
|
if status is None: |
|
raise ValueError(f"Missing running status in track {i}") |
|
|
|
length = 2 if status < 0xC0 or status > 0xDF else 1 |
|
event_data = track_data[pos:pos+length] |
|
pos += length |
|
|
|
if status >= 0x80 and status <= 0xEF: |
|
track.channels.add(status & 0x0F) |
|
|
|
track.events.append(MidiEvent(delta_time, status, event_data, absolute_time)) |
|
|
|
track.duration_ticks = absolute_time |
|
self.tracks.append(track) |
|
offset += 8 + track_len |
|
|
|
def convert_to_format0(self, output_filename: str, exclude_channels: Optional[List[int]] = None): |
|
if self.header.format_type == 0: |
|
print("File is already Format 0") |
|
return |
|
|
|
merged_track = MidiTrack() |
|
all_events = [] |
|
|
|
for track in self.tracks: |
|
for event in track.events: |
|
if exclude_channels and event.status >= 0x80 and event.status <= 0xEF: |
|
channel = event.status & 0x0F |
|
if channel in exclude_channels: |
|
continue |
|
all_events.append((event.absolute_time, event)) |
|
|
|
all_events.sort(key=lambda x: x[0]) |
|
|
|
last_time = 0 |
|
for abs_time, event in all_events: |
|
delta = abs_time - last_time |
|
merged_track.events.append(MidiEvent(delta, event.status, event.data)) |
|
last_time = abs_time |
|
|
|
with open(output_filename, 'wb') as f: |
|
f.write(b'MThd') |
|
f.write(struct.pack('>IHHH', 6, 0, 1, self.header.time_division)) |
|
|
|
track_data = bytearray() |
|
for event in merged_track.events: |
|
delta = event.delta_time |
|
variable_length = bytearray() |
|
while True: |
|
variable_length.insert(0, delta & 0x7F) |
|
if delta < 0x80: |
|
break |
|
delta = (delta >> 7) |
|
variable_length[0] |= 0x80 |
|
track_data.extend(variable_length) |
|
track_data.append(event.status) |
|
track_data.extend(event.data) |
|
|
|
f.write(b'MTrk') |
|
f.write(struct.pack('>I', len(track_data))) |
|
f.write(track_data) |
|
|
|
def split_tracks(self, output_dir: str): |
|
os.makedirs(output_dir, exist_ok=True) |
|
base_name = os.path.splitext(os.path.basename(self.filename))[0] |
|
|
|
for i, track in enumerate(self.tracks): |
|
if not track.events: |
|
continue |
|
|
|
output_name = f"{base_name}_track{i}" |
|
if track.name: |
|
safe_name = "".join(c for c in track.name if c.isalnum() or c in (' ', '-', '_')).rstrip() |
|
if safe_name: |
|
output_name += f"_{safe_name}" |
|
output_path = os.path.join(output_dir, f"{output_name}.mid") |
|
|
|
with open(output_path, 'wb') as f: |
|
f.write(b'MThd') |
|
f.write(struct.pack('>IHHH', 6, 0, 1, self.header.time_division)) |
|
|
|
track_data = bytearray() |
|
for event in track.events: |
|
delta = event.delta_time |
|
variable_length = bytearray() |
|
while True: |
|
variable_length.insert(0, delta & 0x7F) |
|
if delta < 0x80: |
|
break |
|
delta = (delta >> 7) |
|
variable_length[0] |= 0x80 |
|
track_data.extend(variable_length) |
|
track_data.append(event.status) |
|
track_data.extend(event.data) |
|
|
|
f.write(b'MTrk') |
|
f.write(struct.pack('>I', len(track_data))) |
|
f.write(track_data) |
|
|
|
def validate(self): |
|
issues = [] |
|
|
|
if self.header.format_type not in [0, 1, 2]: |
|
issues.append(f"Invalid format type: {self.header.format_type}") |
|
|
|
if self.header.num_tracks != len(self.tracks): |
|
issues.append(f"Track count mismatch: header claims {self.header.num_tracks}, found {len(self.tracks)}") |
|
|
|
for i, track in enumerate(self.tracks): |
|
if not track.events: |
|
issues.append(f"Track {i} is empty") |
|
|
|
has_end_of_track = False |
|
for event in track.events: |
|
if event.status == 0xFF and event.data[0] == 0x2F: |
|
has_end_of_track = True |
|
if not has_end_of_track: |
|
issues.append(f"Track {i} missing end of track event") |
|
|
|
return issues |
|
|
|
def process_directory(directory: str, args): |
|
for root, _, files in os.walk(directory): |
|
for file in files: |
|
if file.lower().endswith('.mid'): |
|
input_file = os.path.join(root, file) |
|
try: |
|
process_file(input_file, args) |
|
except Exception as e: |
|
print(f"Error processing {input_file}: {str(e)}") |
|
|
|
def process_file(input_file: str, args): |
|
midi = MidiFile(input_file) |
|
|
|
if args.validate: |
|
issues = midi.validate() |
|
if issues: |
|
print(f"\nValidation issues in {input_file}:") |
|
for issue in issues: |
|
print(f" - {issue}") |
|
else: |
|
print(f"\n{input_file} passed validation") |
|
|
|
midi.analyze(args.verbose) |
|
|
|
if args.split: |
|
split_dir = os.path.splitext(input_file)[0] + "_tracks" |
|
midi.split_tracks(split_dir) |
|
print(f"\nSplit tracks saved to: {split_dir}") |
|
|
|
if not args.no_convert: |
|
output_file = os.path.splitext(input_file)[0] + '.smf' |
|
exclude_channels = [int(c) for c in args.exclude_channels.split(',')] if args.exclude_channels else None |
|
midi.convert_to_format0(output_file, exclude_channels) |
|
print(f"\nConverted file saved as: {output_file}") |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description='MIDI Format Converter and Analyzer') |
|
parser.add_argument('input', help='Input MIDI file or directory') |
|
parser.add_argument('-r', '--recursive', action='store_true', help='Process directory recursively') |
|
parser.add_argument('-s', '--split', action='store_true', help='Split tracks into separate files') |
|
parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed event information') |
|
parser.add_argument('--validate', action='store_true', help='Validate MIDI file structure') |
|
parser.add_argument('--no-convert', action='store_true', help='Skip Format 0 conversion') |
|
parser.add_argument('--exclude-channels', help='Comma-separated list of channels to exclude') |
|
|
|
args = parser.parse_args() |
|
|
|
if os.path.isdir(args.input): |
|
if args.recursive: |
|
process_directory(args.input, args) |
|
else: |
|
for file in os.listdir(args.input): |
|
if file.lower().endswith('.mid'): |
|
process_file(os.path.join(args.input, file), args) |
|
else: |
|
process_file(args.input, args) |