Created
January 15, 2014 20:20
-
-
Save bmcfee/8443775 to your computer and use it in GitHub Desktop.
Vectorizing stft for great justice
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import librosa" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from librosa.core import *\n", | |
"\n", | |
"def stft_new(y, n_fft=2048, hop_length=None, win_length=None, window=None):\n", | |
" \"\"\"Short-time Fourier transform.\n", | |
"\n", | |
" Returns a complex-valued matrix D such that\n", | |
" - ``np.abs(D[f, t])`` is the magnitude of frequency bin ``f`` at time ``t``\n", | |
" - ``np.angle(D[f, t])`` is the phase of frequency bin ``f`` at time ``t``\n", | |
"\n", | |
" :usage:\n", | |
" >>> y, sr = librosa.load('file.wav')\n", | |
" >>> D = librosa.stft(y)\n", | |
"\n", | |
" :parameters:\n", | |
" - y : np.ndarray\n", | |
" the input signal (audio time series)\n", | |
"\n", | |
" - n_fft : int\n", | |
" FFT window size\n", | |
"\n", | |
" - hop_length : int\n", | |
" number audio of frames between STFT columns.\n", | |
" If unspecified, defaults ``win_length / 4``.\n", | |
"\n", | |
" - win_length : int <= n_fft\n", | |
" Each frame of audio is windowed by the ``window`` function (see below).\n", | |
" The window will be of length ``win_length`` and then padded with zeros\n", | |
" to match ``n_fft``.\n", | |
"\n", | |
" If unspecified, defaults to ``win_length = n_fft``.\n", | |
"\n", | |
" - window : None, function, np.ndarray\n", | |
" - None (default): use an asymmetric Hann window\n", | |
" - a window function, such as ``scipy.signal.hanning``\n", | |
" - a vector or array of length ``n_fft``\n", | |
"\n", | |
" :returns:\n", | |
" - D : np.ndarray, dtype=complex\n", | |
" STFT matrix\n", | |
"\n", | |
" \"\"\"\n", | |
"\n", | |
" # By default, use the entire frame\n", | |
" if win_length is None:\n", | |
" win_length = n_fft\n", | |
"\n", | |
" # Set the default hop, if it's not already specified\n", | |
" if hop_length is None:\n", | |
" hop_length = int(win_length / 4)\n", | |
"\n", | |
" n_specbins = 1 + int(n_fft / 2)\n", | |
" n_frames = 1 + int( (len(y) - n_fft) / hop_length)\n", | |
"\n", | |
" if window is None:\n", | |
" # Default is an asymmetric Hann window\n", | |
" fft_window = scipy.signal.hann(win_length, sym=False)\n", | |
"\n", | |
" elif hasattr(window, '__call__'):\n", | |
" # User supplied a window function\n", | |
" fft_window = window(win_length)\n", | |
"\n", | |
" else:\n", | |
" # User supplied a window vector.\n", | |
" # Make sure it's an array:\n", | |
" fft_window = np.asarray(window)\n", | |
"\n", | |
" # validate length compatibility\n", | |
" if fft_window.size != n_fft:\n", | |
" raise ValueError('Size mismatch between n_fft and len(window)')\n", | |
"\n", | |
" # Pad the window out to n_fft size\n", | |
" lpad = (n_fft - win_length)/2\n", | |
" fft_window = np.pad(fft_window, (lpad, n_fft - win_length - lpad), mode='constant')\n", | |
" fft_window = fft_window.reshape((1,-1))\n", | |
" # allocate output array\n", | |
" #stft_matrix = np.empty( (n_specbins, n_frames), dtype=np.complex64)\n", | |
"\n", | |
" # window up the time series\n", | |
" data_matrix = np.empty( (n_frames, n_fft), dtype=y.dtype)\n", | |
" \n", | |
" for i in xrange(n_frames):\n", | |
" sample = i * hop_length\n", | |
" #frame = fft.fft(fft_window * y[sample:(sample+n_fft)])\n", | |
"\n", | |
" # Conjugate here to match phase from DPWE code\n", | |
" #stft_matrix[:, i] = frame[:n_specbins].conj()\n", | |
" data_matrix[i] = y[sample:(sample+n_fft)]\n", | |
" \n", | |
" stft_matrix = fft.rfft(fft_window * data_matrix, axis=-1).conj().astype(np.complex64)\n", | |
"\n", | |
" return stft_matrix.T\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"tracks = librosa.util.get_audio_files('/home/bmcfee/data/CAL500/mp3/')\n", | |
"print tracks[4]\n", | |
"y, sr = librosa.load(tracks[4])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"/home/bmcfee/data/CAL500/mp3/aaron_neville-tell_it_like_it_is.mp3\n" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%%timeit\n", | |
"Z1 = librosa.core.stft(y, hop_length=64)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 loops, best of 3: 4.12 s per loop\n" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%%timeit\n", | |
"Z2 = stft_new(y, hop_length=64)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 loops, best of 3: 2.43 s per loop\n" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"Z1 = librosa.core.stft(y, hop_length=64)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"Z2 = stft_new(y, hop_length=64)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"np.allclose(Z1, Z2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"True" | |
] | |
} | |
], | |
"prompt_number": 10 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment