Created
April 19, 2019 14:03
-
-
Save glegrain/bebc5a801c0896c9429dab9cb4dfaaf1 to your computer and use it in GitHub Desktop.
Python LogMel feature extraction code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# This software component is licensed by ST under BSD 3-Clause license, | |
# the "License"; You may not use this file except in compliance with the | |
# License. You may obtain a copy of the License at: | |
# https://opensource.org/licenses/BSD-3-Clause | |
"""LogMel Feature Extraction example.""" | |
import numpy as np | |
import sys | |
import librosa | |
import librosa.display | |
import scipy.fftpack as fft | |
SR = 16000 | |
N_FFT = 1024 | |
N_MELS = 30 | |
def create_col(y): | |
assert y.shape == (1024,) | |
# Create time-series window | |
fft_window = librosa.filters.get_window('hann', N_FFT, fftbins=True) | |
assert fft_window.shape == (1024,), fft_window.shape | |
# Hann window | |
y_windowed = fft_window * y | |
assert y_windowed.shape == (1024,), y_windowed.shape | |
# FFT | |
fft_out = fft.fft(y_windowed, axis=0)[:513] | |
assert fft_out.shape == (513,), fft_out.shape | |
# Power spectrum | |
S_pwr = np.abs(fft_out)**2 | |
assert S_pwr.shape == (513,) | |
# Generation of Mel Filter Banks | |
mel_basis = librosa.filters.mel(SR, n_fft=N_FFT, n_mels=N_MELS, htk=False) | |
assert mel_basis.shape == (30, 513) | |
# Apply Mel Filter Banks | |
S_mel = np.dot(mel_basis, S_pwr) | |
S_mel.astype(np.float32) | |
assert S_mel.shape == (30,) | |
return S_mel | |
def feature_extraction(y): | |
assert y.shape == (32, 1024) | |
S_mel = np.empty((30, 32), dtype=np.float32, order='C') | |
for col_index in range(0, 32): | |
S_mel[:, col_index] = create_col(y[col_index]) | |
# Scale according to reference power | |
S_mel = S_mel / S_mel.max() | |
# Convert to dB | |
S_log_mel = librosa.power_to_db(S_mel, top_db=80.0) | |
assert S_log_mel.shape == (30, 32) | |
return S_log_mel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, Can I use this log mel spec code for 1second audio also.