Skip to content

Instantly share code, notes, and snippets.

@instr3
Created August 29, 2025 09:53
Show Gist options
  • Save instr3/e2d885fee209b80dbdcdc548504eae1e to your computer and use it in GitHub Desktop.
Save instr3/e2d885fee209b80dbdcdc548504eae1e to your computer and use it in GitHub Desktop.
ugly_midi.py
import pretty_midi
import mido
import six
from typing import Union
from pretty_midi.utilities import (key_name_to_key_number, qpm_to_bpm)
from pretty_midi import TimeSignature
from pretty_midi import KeySignature, Lyric, Text
from heapq import merge
class UglyMIDI(pretty_midi.PrettyMIDI):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, midi_file: str, constant_tempo: Union[None, float] = None, verbose: bool = False,
fix_track0: bool = True):
"""Initialize either by populating it with MIDI data from a file or
from scratch with no data.
"""
# Load in the MIDI data using the midi module
if isinstance(midi_file, six.string_types):
# If a string was given, pass it as the string filename
midi_data = mido.MidiFile(filename=midi_file, clip=True)
else:
# Otherwise, try passing it in as a file pointer
midi_data = mido.MidiFile(file=midi_file, clip=True)
# Convert tick values in midi_data to absolute, a useful thing.
for track in midi_data.tracks:
tick = 0
# has_negative = False
for event in track:
if event.time > 0x7FFFFFFF:
# Nothing is written on the standard MIDI file format! How ugly. How should we do this?
# has_negative = True
event.time = 0 # c_int32(event.time).value
event.time += tick
tick = event.time
# if has_negative:
# track.sort(key=lambda e: e.time)
# Move all tempo, key, time signature events to track 0
if fix_track0:
for track_id in range(1, len(midi_data.tracks)):
track = midi_data.tracks[track_id]
for event in track:
if event.type in ('set_tempo', 'key_signature', 'time_signature'):
midi_data.tracks[0].append(event)
# Clear all moved events at once, to make it faster
midi_data.tracks[track_id] = [event for event in track if
event.type not in ('set_tempo', 'key_signature', 'time_signature')]
# Sort track 0 again. list.sort should be stable, which is important.
midi_data.tracks[0].sort(key=lambda e: e.time)
# Store the resolution for later use
self.resolution = midi_data.ticks_per_beat
# Populate the list of tempo changes (tick scales)
if constant_tempo is not None:
self._tick_scales = [(0, 60.0 / (float(constant_tempo) * self.resolution))]
else:
self._load_tempo_changes(midi_data)
# Update the array which maps ticks to time
max_tick = max([max([e.time for e in t])
for t in midi_data.tracks]) + 1
# If max_tick is huge, the MIDI file is probably corrupt
# and creating the __tick_to_time array will thrash memory
if max_tick > pretty_midi.MAX_TICK:
raise ValueError(('MIDI file has a largest tick of {},'
' it is likely corrupt'.format(max_tick)))
# Create list that maps ticks to time in seconds
self._update_tick_to_time(max_tick)
# Populate the list of key and time signature changes
self._load_metadata(midi_data)
# Populate the list of instruments
self._load_instruments(midi_data)
def _load_metadata(self, midi_data):
"""Populates ``self.time_signature_changes`` with ``TimeSignature``
objects, ``self.key_signature_changes`` with ``KeySignature`` objects,
``self.lyrics`` with ``Lyric`` objects and ``self.text_events`` with
``Text`` objects.
Parameters
----------
midi_data : midi.FileReader
MIDI object from which data will be read.
"""
# Initialize empty lists for storing key signature changes, time
# signature changes, and lyrics
self.key_signature_changes = []
self.time_signature_changes = []
self.lyrics = []
self.text_events = []
self.markers = []
for event in midi_data.tracks[0]:
if event.type == 'key_signature':
key_obj = KeySignature(
key_name_to_key_number(event.key),
self._PrettyMIDI__tick_to_time[event.time])
self.key_signature_changes.append(key_obj)
elif event.type == 'time_signature':
ts_obj = TimeSignature(event.numerator,
event.denominator,
self._PrettyMIDI__tick_to_time[event.time])
self.time_signature_changes.append(ts_obj)
# We search for lyrics and text events on all tracks
# Lists of lyrics and text events lists, for every track
tracks_with_lyrics = []
tracks_with_text_events = []
for track in midi_data.tracks:
# Track specific lists that get appended if not empty
lyrics = []
text_events = []
for event in track:
if event.type == 'lyrics':
lyrics.append(Lyric(
event.text, self._PrettyMIDI__tick_to_time[event.time]))
elif event.type == 'text':
text_events.append(Text(
event.text, self._PrettyMIDI__tick_to_time[event.time]))
elif event.type == 'marker':
self.markers.append(Text(
event.text, self._PrettyMIDI__tick_to_time[event.time]))
if lyrics:
tracks_with_lyrics.append(lyrics)
if text_events:
tracks_with_text_events.append(text_events)
# We merge the already sorted lists for every track, based on time
self.lyrics = list(merge(*tracks_with_lyrics, key=lambda x: x.time))
self.text_events = list(merge(*tracks_with_text_events, key=lambda x: x.time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment