Skip to content

Instantly share code, notes, and snippets.

@lostanlen
Created June 16, 2020 13:10
Show Gist options
  • Save lostanlen/6abbfdf505df1779e7b67940e0631057 to your computer and use it in GitHub Desktop.
Save lostanlen/6abbfdf505df1779e7b67940e0631057 to your computer and use it in GitHub Desktop.
Melodyplot: a waveplot in the piano-roll domain
import librosa
from librosa.display import specshow
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import os
import scipy.signal
%matplotlib inline
## USER CODE
def generate_melodyplot_example():
hop_length = 512
win_length = 2048
n_fft = 2048
min_height = -30 # in dB wrt maximum amplitude
librosa_split = os.path.normpath(librosa.__file__).split(os.path.sep)
librosa_dir = (os.path.sep).join(librosa_split[:-2])
examples_dir = os.path.join(librosa_dir, "docs", "examples", "audio")
example_name = "sir_duke_fast.mp3"
example_path = os.path.join(examples_dir, example_name)
y, sr = librosa.load(example_path)
# Minimalist fundamental frequency estimator.
S = librosa.stft(y,
hop_length=hop_length, win_length=win_length, n_fft=n_fft)
S_dB = librosa.amplitude_to_db(np.abs(S), ref=np.max)
stft_frequencies = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
frequencies = []
for S_frame in S_dB.T:
peaks = scipy.signal.find_peaks(S_frame, height=min_height)[0]
if len(peaks)>0:
frequencies.append(stft_frequencies[peaks[0]])
else:
frequencies.append(np.nan)
frequencies = np.array(frequencies)
# Post-process fundamental frequencies by removing large jumps
freq_absdiff = np.abs(np.diff(frequencies))
frequencies[:-1][np.less(15, freq_absdiff, where=~np.isnan(freq_absdiff))] = np.nan
# Segment and quantize pitch contours
segment_starts = np.where(np.roll(np.isnan(frequencies), shift=1) & ~np.isnan(frequencies))[0]
segment_stops = np.where(~np.roll(np.isnan(frequencies), shift=1) & np.isnan(frequencies))[0]
freq_contours = [frequencies[start:stop] for start, stop in zip(segment_starts, segment_stops)]
avg_frequencies = list(map(np.mean, freq_contours))
durations = [stop-start for start, stop in zip(segment_starts, segment_stops)]
quantized_frequencies = librosa.midi_to_hz(librosa.hz_to_midi(avg_frequencies))
# Build melody
melody = np.full(len(frequencies), np.nan)
segment_stops = np.concatenate(np.roll(segment_starts, -1)[:-1], np.array(segment_stops[-1]))
melody_zip = zip(segment_starts, segment_stops, quantized_frequencies)
for (start, stop, freq) in melody_zip:
melody[start:stop] = freq
return y, sr, melody, hop_length
y, sr, melody, hop_length = generate_melodyplot_example()
## LIBRARY CODE
# Segment melody
melody_indicator = ~np.isnan(melody)
melody_indicator_prev = np.roll(melody_indicator, shift=1)
melody_indicator_next = np.roll(melody_indicator, shift=-1)
segment_starts = np.where(~melody_indicator_prev & melody_indicator)[0]
segment_stops = np.where(melody_indicator & ~melody_indicator_next)[0]
contours = [melody[start:stop] for start, stop in zip(segment_starts, segment_stops)
if (stop-start)>0]
avg_freqs = list(map(np.mean, contours))
sample_starts = librosa.frames_to_samples(segment_starts)
sample_stops = librosa.frames_to_samples(segment_stops)
envelope = np.max(librosa.util.frame(y, hop_length=hop_length), 0)
trimmed_indicator = ~np.isnan(melody)[:len(envelope)]
argmax_envelope = np.argmax(envelope[trimmed_indicator])
max_envelope = envelope[trimmed_indicator][argmax_envelope]
freq_max_envelope = melody[:len(envelope)][trimmed_indicator][argmax_envelope]
plt.figure(figsize=(15, 8))
for start, stop, contour in zip(segment_starts, segment_stops, contours):
sample_start = librosa.frames_to_samples(start, hop_length=512)
sample_stop = librosa.frames_to_samples(stop, hop_length=512)
y_contour = y[sample_start:sample_stop]
if len(y_contour) < (4*hop_length):
y_contour = np.pad(y_contour, 4*hop_length - len(y_contour), mode='constant')
y_contour_waveplot = np.max(librosa.util.frame(
y_contour, hop_length=hop_length), 0)
multiplier = 2**(1/12) * max_envelope * freq_max_rms/np.mean(contour)
offset = librosa.frames_to_time(start, sr=sr, hop_length=hop_length)
locs = offset + librosa.frames_to_time(
np.arange(len(y_contour_waveplot)), sr=sr, hop_length=hop_length)
plt.fill_between(locs,
contour[:len(y_contour_waveplot)]-50*y_contour_waveplot,
contour[:len(y_contour_waveplot)]+50*y_contour_waveplot) # this 50 is ad hoc
axes = plt.gca()
librosa.display.__scale_axes(axes, "s", 'x')
librosa.display.__scale_axes(axes, "cqt_note", 'y')
librosa.display.__decorate_axis(axes.xaxis, "s")
librosa.display.__decorate_axis(axes.yaxis, "cqt_note")
plt.title("Sir Duke (fast)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment