Last active
April 19, 2018 22:21
-
-
Save jsleep/5d0c438124cfb59a4b0687d984e31606 to your computer and use it in GitHub Desktop.
Rough transcription from audio to midi using CQT and thresholding.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
import numpy as np | |
import pretty_midi | |
from __future__ import division | |
#CLI Parameters | |
audio_fn = '../data/wav/Bounce.wav' | |
sr = 22050 | |
min_midi = 24 | |
max_midi = 107 | |
threshold = 64 | |
#librosa calls | |
y,_ = librosa.load(audio_fn,sr=sr) | |
C = librosa.cqt(y, sr=sr, fmin=librosa.midi_to_hz(min_midi), | |
n_bins=max_midi-min_midi) | |
#cqt to piano roll | |
cdb = np.abs(C) | |
#scale magnitude to range(0,127) | |
cdb_min = cdb.min() | |
if cdb_min < 0: | |
cdb_min = -cdb_min | |
cdb += cdb_min | |
cdb *= (127.0/cdb.max()) | |
cdb[cdb < threshold] = 0 | |
piano_roll = cdb | |
#pad it so there are 128 rows | |
piano_roll = np.pad(piano_roll, [(128-max_midi, min_midi), (0, 0)], 'constant')# | |
#get audio time | |
audio_time = len(y) / sr | |
#sampling frequency of cqt output with hop_length | |
fs = piano_roll.shape[1]/audio_time | |
pm = piano_roll_to_pretty_midi(piano_roll,fs=fs) | |
pm.write('/tmp/test.mid') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment