Skip to content

Instantly share code, notes, and snippets.

@hsiboy
Last active January 24, 2025 17:21
Show Gist options
  • Save hsiboy/7b0f0fd44a9ce924c8f4db92f9c08948 to your computer and use it in GitHub Desktop.
Save hsiboy/7b0f0fd44a9ce924c8f4db92f9c08948 to your computer and use it in GitHub Desktop.
Convert midi files to Format 0 or Standard Midi Files (SMF) as used by many older workstations/keyboards.

MIDI Format Converter

Converts MIDI files to Format 0 (SMF) and provides analysis tools.

Features

  • Convert Format 1/2 MIDI files to Format 0 (SMF)
  • Analyze MIDI file structure and content
  • Split multi-track files into separate files
  • Process directories of MIDI files
  • Filter specific MIDI channels
  • Validate MIDI file structure
  • Detailed event logging

Installation

git clone [repository-url]
cd midi-converter

No external dependencies required. Uses Python standard library.

Usage

Basic Conversion

python midi_converter.py input.mid

Creates input.smf in Format 0.

Command Line Options

-r, --recursive        Process directory recursively
-s, --split           Split tracks into separate files
-v, --verbose         Show detailed event information
--validate            Validate MIDI file structure
--no-convert         Skip Format 0 conversion
--exclude-channels    Comma-separated list of channels to exclude

Examples

Process single file with detailed analysis:

python midi_converter.py input.mid -v

Convert directory of MIDI files:

python midi_converter.py ./midi_dir -r

Split tracks and exclude drum channel:

python midi_converter.py input.mid -s --exclude-channels 9

Validate without converting:

python midi_converter.py input.mid --validate --no-convert

Output

Analysis Output

Analyzing: input.mid
Format: 1
Tracks: 16
Time Division: 480 ticks per quarter note
Tempo: 500000 microseconds per quarter note (120.0 BPM)
Duration: 3m 45.2s

Track 0:
  Name: Piano
  Channels: [0, 1]

File Output

  • .smf - Converted Format 0 files
  • _tracks/ - Directory containing split tracks (when using -s)

Error Handling

  • Validates MIDI file headers
  • Checks for track integrity
  • Reports detailed validation issues
  • Handles running status correctly
  • Graceful error reporting for corrupt files

Technical Details

  • Supports all MIDI event types
  • Preserves tempo and time division
  • Maintains channel assignments
  • Processes meta events (track names, tempo, etc.)
  • Handles system exclusive messages

License

Unlicensed

import struct
import argparse
import os
from dataclasses import dataclass
from typing import List, Tuple, Set, Optional
@dataclass
class MidiHeader:
format_type: int
num_tracks: int
time_division: int
@dataclass
class MidiEvent:
delta_time: int
status: int
data: bytes
absolute_time: int = 0
class MidiTrack:
def __init__(self):
self.events: List[MidiEvent] = []
self.name: str = ""
self.channels: Set[int] = set()
self.duration_ticks: int = 0
def analyze_track_content(self) -> dict:
has_notes = False
has_cc = False
cc_patterns = []
for event in self.events:
if event.status >= 0x90 and event.status <= 0x9F and event.data[1] > 0:
has_notes = True
elif event.status >= 0xB0 and event.status <= 0xBF:
has_cc = True
cc_patterns.append((event.data[0], event.data[1], event.absolute_time))
is_decorative = False
if has_cc and len(cc_patterns) > 20:
time_gaps = [t2[2] - t1[2] for t1, t2 in zip(cc_patterns, cc_patterns[1:])]
if len(set(time_gaps)) < len(time_gaps) / 4:
is_decorative = True
return {
'has_notes': has_notes,
'has_cc': has_cc,
'is_decorative': is_decorative,
'is_empty': not (has_notes or has_cc)
}
class MidiFile:
def __init__(self, filename: str):
self.filename = filename
self.header: MidiHeader = None
self.tracks: List[MidiTrack] = []
self.tempo: int = 500000
self._parse_file()
def analyze(self, verbose: bool = False):
print(f"\nAnalyzing: {self.filename}")
print(f"Format: {self.header.format_type}")
print(f"Tracks: {self.header.num_tracks}")
print(f"Time Division: {self.header.time_division} ticks per quarter note")
bpm = 60000000 / self.tempo
print(f"Tempo: {self.tempo} microseconds per quarter note ({bpm:.1f} BPM)")
max_duration = max(track.duration_ticks for track in self.tracks)
duration_seconds = (max_duration * self.tempo) / (self.header.time_division * 1000000)
minutes = int(duration_seconds // 60)
seconds = duration_seconds % 60
print(f"Duration: {minutes}m {seconds:.1f}s")
for i, track in enumerate(self.tracks):
content = track.analyze_track_content()
if content['is_empty']:
print(f"\nTrack {i}: Empty/Text only")
elif content['is_decorative']:
print(f"\nTrack {i}: Likely decorative/ASCII art")
else:
print(f"\nTrack {i}:")
if track.name:
print(f" Name: {track.name}")
if track.channels:
print(f" Channels: {sorted(list(track.channels))}")
if verbose:
print(" Events:")
for event in track.events:
if event.status == 0xFF:
event_type = event.data[0]
if event_type == 0x03:
print(f" {event.absolute_time}: Track Name: {event.data[1:].decode('ascii', errors='ignore')}")
elif event_type == 0x51:
tempo = struct.unpack('>I', b'\x00' + event.data[1:])[0]
print(f" {event.absolute_time}: Tempo: {tempo} ({60000000/tempo:.1f} BPM)")
else:
print(f" {event.absolute_time}: Meta Event: {hex(event_type)}")
elif event.status >= 0x80 and event.status <= 0xEF:
print(f" {event.absolute_time}: MIDI Event: {hex(event.status)} Channel: {event.status & 0x0F}")
def _read_variable_length(self, data: bytes, offset: int) -> Tuple[int, int]:
value = 0
bytes_read = 0
while True:
byte = data[offset + bytes_read]
value = (value << 7) | (byte & 0x7F)
bytes_read += 1
if not byte & 0x80:
break
return value, bytes_read
def _parse_file(self):
with open(self.filename, 'rb') as f:
data = f.read()
if data[:4] != b'MThd':
raise ValueError("Not a valid MIDI file")
header_len = struct.unpack('>I', data[4:8])[0]
format_type, num_tracks, time_division = struct.unpack('>HHH', data[8:14])
self.header = MidiHeader(format_type, num_tracks, time_division)
offset = 8 + header_len
for i in range(num_tracks):
if data[offset:offset+4] != b'MTrk':
raise ValueError(f"Invalid track header at track {i}")
track_len = struct.unpack('>I', data[offset+4:offset+8])[0]
track_data = data[offset+8:offset+8+track_len]
track = MidiTrack()
pos = 0
absolute_time = 0
running_status = None
while pos < len(track_data):
delta_time, vl_bytes = self._read_variable_length(track_data, pos)
pos += vl_bytes
absolute_time += delta_time
status = track_data[pos]
pos += 1
if status == 0xFF:
meta_type = track_data[pos]
pos += 1
length, vl_bytes = self._read_variable_length(track_data, pos)
pos += vl_bytes
event_data = track_data[pos:pos+length]
pos += length
if meta_type == 0x03:
track.name = event_data.decode('ascii', errors='ignore')
elif meta_type == 0x51:
self.tempo = struct.unpack('>I', b'\x00' + event_data)[0]
track.events.append(MidiEvent(delta_time, status, bytes([meta_type]) + event_data, absolute_time))
elif status == 0xF0 or status == 0xF7:
length, vl_bytes = self._read_variable_length(track_data, pos)
pos += vl_bytes
event_data = track_data[pos:pos+length]
pos += length
track.events.append(MidiEvent(delta_time, status, event_data, absolute_time))
else:
if status & 0x80:
running_status = status
else:
pos -= 1
status = running_status
if status is None:
raise ValueError(f"Missing running status in track {i}")
length = 2 if status < 0xC0 or status > 0xDF else 1
event_data = track_data[pos:pos+length]
pos += length
if status >= 0x80 and status <= 0xEF:
track.channels.add(status & 0x0F)
track.events.append(MidiEvent(delta_time, status, event_data, absolute_time))
track.duration_ticks = absolute_time
self.tracks.append(track)
offset += 8 + track_len
def convert_to_format0(self, output_filename: str, exclude_channels: Optional[List[int]] = None):
if self.header.format_type == 0:
print("File is already Format 0")
return
merged_track = MidiTrack()
all_events = []
for track in self.tracks:
for event in track.events:
if exclude_channels and event.status >= 0x80 and event.status <= 0xEF:
channel = event.status & 0x0F
if channel in exclude_channels:
continue
all_events.append((event.absolute_time, event))
all_events.sort(key=lambda x: x[0])
last_time = 0
for abs_time, event in all_events:
delta = abs_time - last_time
merged_track.events.append(MidiEvent(delta, event.status, event.data))
last_time = abs_time
with open(output_filename, 'wb') as f:
f.write(b'MThd')
f.write(struct.pack('>IHHH', 6, 0, 1, self.header.time_division))
track_data = bytearray()
for event in merged_track.events:
delta = event.delta_time
variable_length = bytearray()
while True:
variable_length.insert(0, delta & 0x7F)
if delta < 0x80:
break
delta = (delta >> 7)
variable_length[0] |= 0x80
track_data.extend(variable_length)
track_data.append(event.status)
track_data.extend(event.data)
f.write(b'MTrk')
f.write(struct.pack('>I', len(track_data)))
f.write(track_data)
def split_tracks(self, output_dir: str):
os.makedirs(output_dir, exist_ok=True)
base_name = os.path.splitext(os.path.basename(self.filename))[0]
for i, track in enumerate(self.tracks):
if not track.events:
continue
output_name = f"{base_name}_track{i}"
if track.name:
safe_name = "".join(c for c in track.name if c.isalnum() or c in (' ', '-', '_')).rstrip()
if safe_name:
output_name += f"_{safe_name}"
output_path = os.path.join(output_dir, f"{output_name}.mid")
with open(output_path, 'wb') as f:
f.write(b'MThd')
f.write(struct.pack('>IHHH', 6, 0, 1, self.header.time_division))
track_data = bytearray()
for event in track.events:
delta = event.delta_time
variable_length = bytearray()
while True:
variable_length.insert(0, delta & 0x7F)
if delta < 0x80:
break
delta = (delta >> 7)
variable_length[0] |= 0x80
track_data.extend(variable_length)
track_data.append(event.status)
track_data.extend(event.data)
f.write(b'MTrk')
f.write(struct.pack('>I', len(track_data)))
f.write(track_data)
def validate(self):
issues = []
if self.header.format_type not in [0, 1, 2]:
issues.append(f"Invalid format type: {self.header.format_type}")
if self.header.num_tracks != len(self.tracks):
issues.append(f"Track count mismatch: header claims {self.header.num_tracks}, found {len(self.tracks)}")
for i, track in enumerate(self.tracks):
if not track.events:
issues.append(f"Track {i} is empty")
has_end_of_track = False
for event in track.events:
if event.status == 0xFF and event.data[0] == 0x2F:
has_end_of_track = True
if not has_end_of_track:
issues.append(f"Track {i} missing end of track event")
return issues
def process_directory(directory: str, args):
for root, _, files in os.walk(directory):
for file in files:
if file.lower().endswith('.mid'):
input_file = os.path.join(root, file)
try:
process_file(input_file, args)
except Exception as e:
print(f"Error processing {input_file}: {str(e)}")
def process_file(input_file: str, args):
midi = MidiFile(input_file)
if args.validate:
issues = midi.validate()
if issues:
print(f"\nValidation issues in {input_file}:")
for issue in issues:
print(f" - {issue}")
else:
print(f"\n{input_file} passed validation")
midi.analyze(args.verbose)
if args.split:
split_dir = os.path.splitext(input_file)[0] + "_tracks"
midi.split_tracks(split_dir)
print(f"\nSplit tracks saved to: {split_dir}")
if not args.no_convert:
output_file = os.path.splitext(input_file)[0] + '.smf'
exclude_channels = [int(c) for c in args.exclude_channels.split(',')] if args.exclude_channels else None
midi.convert_to_format0(output_file, exclude_channels)
print(f"\nConverted file saved as: {output_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MIDI Format Converter and Analyzer')
parser.add_argument('input', help='Input MIDI file or directory')
parser.add_argument('-r', '--recursive', action='store_true', help='Process directory recursively')
parser.add_argument('-s', '--split', action='store_true', help='Split tracks into separate files')
parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed event information')
parser.add_argument('--validate', action='store_true', help='Validate MIDI file structure')
parser.add_argument('--no-convert', action='store_true', help='Skip Format 0 conversion')
parser.add_argument('--exclude-channels', help='Comma-separated list of channels to exclude')
args = parser.parse_args()
if os.path.isdir(args.input):
if args.recursive:
process_directory(args.input, args)
else:
for file in os.listdir(args.input):
if file.lower().endswith('.mid'):
process_file(os.path.join(args.input, file), args)
else:
process_file(args.input, args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment