Created
December 8, 2017 14:56
-
-
Save keunwoochoi/cee596f2c5993110af7d5f2ec5c20129 to your computer and use it in GitHub Desktop.
to debug kapre's spectrogram. copied from ipython notebook.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
plt.style.use('ggplot') | |
import numpy as np | |
import librosa | |
import keras | |
from keras import backend as K | |
print(keras.__version__) | |
print('Keras backend: ', keras.backend._BACKEND, ' and image dim ordering: ', keras.backend.image_dim_ordering()) | |
import kapre | |
if keras.backend._BACKEND == 'tensorflow': | |
import tensorflow | |
print(tensorflow.__version__) | |
else: | |
import theano | |
print(theano.__version__) | |
from keras.models import Sequential | |
from librosa import display | |
duration = 1.0 | |
n_dft = 1024 | |
border_mode = 'same' | |
n_hop = n_dft / 3 | |
n_filter = n_dft / 2 + 1 | |
power = 1.0 | |
dim_ordering = 'tf' | |
if dim_ordering == 'th': | |
ch_axis_idx = 1 | |
else: | |
ch_axis_idx = 3 | |
sr = 32000 | |
# Load whatever | |
src, sr = librosa.load('../srcs/bensound-cute.mp3', mono=False, duration=duration, sr=sr) | |
src.shape | |
from kapre.time_frequency import Spectrogram | |
model = Sequential() | |
model.add(Spectrogram(n_dft=n_dft, n_hop=n_hop, input_shape=src.shape, power_spectrogram=2.0)) | |
stft_per_channels = model.predict(src[np.newaxis, :, :]) | |
# stft_per_channels = librosa.logamplitude(stft_per_channels, ref_power=1.0) | |
print('Shape of a batch (batch_size == 1 though): {}'.format(stft_per_channels.shape)) | |
import scipy | |
window = scipy.signal.hann(n_dft, sym=False) | |
stft_librosa_l = np.abs(librosa.stft(src[0], n_dft, n_hop, window=window))**2 | |
stft_librosa_r = librosa.logamplitude(np.abs(librosa.stft(src[1], n_dft, n_hop, window=window))**2, | |
ref_power=1.0) | |
print("shape of librosa result: {}".format(stft_librosa_l.shape)) | |
librosa.display.specshow(np.abs(stft_per_channels[0, :, :, 0] - stft_librosa_l)[10], y_axis='linear', sr=sr) | |
plt.title('residual') | |
plt.colorbar() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In this line:
librosa.display.specshow(np.abs(stft_per_channels[0, :, :, 0] - stft_librosa_l)[10], y_axis='linear', sr=sr) there is a 10. I think this should be removed, isnt it?
Or maybe you wanted to write [:10] in order to put in evidence the differences?
Anyway, I got the idea. So thanks for the gist.