Skip to content

Instantly share code, notes, and snippets.

@jsleep
Last active April 19, 2018 22:21
Show Gist options
  • Save jsleep/5d0c438124cfb59a4b0687d984e31606 to your computer and use it in GitHub Desktop.
Save jsleep/5d0c438124cfb59a4b0687d984e31606 to your computer and use it in GitHub Desktop.
Rough transcription from audio to midi using CQT and thresholding.
import librosa
import numpy as np
import pretty_midi
from __future__ import division
#CLI Parameters
audio_fn = '../data/wav/Bounce.wav'
sr = 22050
min_midi = 24
max_midi = 107
threshold = 64
#librosa calls
y,_ = librosa.load(audio_fn,sr=sr)
C = librosa.cqt(y, sr=sr, fmin=librosa.midi_to_hz(min_midi),
n_bins=max_midi-min_midi)
#cqt to piano roll
cdb = np.abs(C)
#scale magnitude to range(0,127)
cdb_min = cdb.min()
if cdb_min < 0:
cdb_min = -cdb_min
cdb += cdb_min
cdb *= (127.0/cdb.max())
cdb[cdb < threshold] = 0
piano_roll = cdb
#pad it so there are 128 rows
piano_roll = np.pad(piano_roll, [(128-max_midi, min_midi), (0, 0)], 'constant')#
#get audio time
audio_time = len(y) / sr
#sampling frequency of cqt output with hop_length
fs = piano_roll.shape[1]/audio_time
pm = piano_roll_to_pretty_midi(piano_roll,fs=fs)
pm.write('/tmp/test.mid')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment