Skip to content

Instantly share code, notes, and snippets.

@bmcfee
Created January 15, 2014 20:20
Show Gist options
  • Save bmcfee/8443775 to your computer and use it in GitHub Desktop.
Save bmcfee/8443775 to your computer and use it in GitHub Desktop.
Vectorizing stft for great justice
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import librosa"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from librosa.core import *\n",
"\n",
"def stft_new(y, n_fft=2048, hop_length=None, win_length=None, window=None):\n",
" \"\"\"Short-time Fourier transform.\n",
"\n",
" Returns a complex-valued matrix D such that\n",
" - ``np.abs(D[f, t])`` is the magnitude of frequency bin ``f`` at time ``t``\n",
" - ``np.angle(D[f, t])`` is the phase of frequency bin ``f`` at time ``t``\n",
"\n",
" :usage:\n",
" >>> y, sr = librosa.load('file.wav')\n",
" >>> D = librosa.stft(y)\n",
"\n",
" :parameters:\n",
" - y : np.ndarray\n",
" the input signal (audio time series)\n",
"\n",
" - n_fft : int\n",
" FFT window size\n",
"\n",
" - hop_length : int\n",
" number audio of frames between STFT columns.\n",
" If unspecified, defaults ``win_length / 4``.\n",
"\n",
" - win_length : int <= n_fft\n",
" Each frame of audio is windowed by the ``window`` function (see below).\n",
" The window will be of length ``win_length`` and then padded with zeros\n",
" to match ``n_fft``.\n",
"\n",
" If unspecified, defaults to ``win_length = n_fft``.\n",
"\n",
" - window : None, function, np.ndarray\n",
" - None (default): use an asymmetric Hann window\n",
" - a window function, such as ``scipy.signal.hanning``\n",
" - a vector or array of length ``n_fft``\n",
"\n",
" :returns:\n",
" - D : np.ndarray, dtype=complex\n",
" STFT matrix\n",
"\n",
" \"\"\"\n",
"\n",
" # By default, use the entire frame\n",
" if win_length is None:\n",
" win_length = n_fft\n",
"\n",
" # Set the default hop, if it's not already specified\n",
" if hop_length is None:\n",
" hop_length = int(win_length / 4)\n",
"\n",
" n_specbins = 1 + int(n_fft / 2)\n",
" n_frames = 1 + int( (len(y) - n_fft) / hop_length)\n",
"\n",
" if window is None:\n",
" # Default is an asymmetric Hann window\n",
" fft_window = scipy.signal.hann(win_length, sym=False)\n",
"\n",
" elif hasattr(window, '__call__'):\n",
" # User supplied a window function\n",
" fft_window = window(win_length)\n",
"\n",
" else:\n",
" # User supplied a window vector.\n",
" # Make sure it's an array:\n",
" fft_window = np.asarray(window)\n",
"\n",
" # validate length compatibility\n",
" if fft_window.size != n_fft:\n",
" raise ValueError('Size mismatch between n_fft and len(window)')\n",
"\n",
" # Pad the window out to n_fft size\n",
" lpad = (n_fft - win_length)/2\n",
" fft_window = np.pad(fft_window, (lpad, n_fft - win_length - lpad), mode='constant')\n",
" fft_window = fft_window.reshape((1,-1))\n",
" # allocate output array\n",
" #stft_matrix = np.empty( (n_specbins, n_frames), dtype=np.complex64)\n",
"\n",
" # window up the time series\n",
" data_matrix = np.empty( (n_frames, n_fft), dtype=y.dtype)\n",
" \n",
" for i in xrange(n_frames):\n",
" sample = i * hop_length\n",
" #frame = fft.fft(fft_window * y[sample:(sample+n_fft)])\n",
"\n",
" # Conjugate here to match phase from DPWE code\n",
" #stft_matrix[:, i] = frame[:n_specbins].conj()\n",
" data_matrix[i] = y[sample:(sample+n_fft)]\n",
" \n",
" stft_matrix = fft.rfft(fft_window * data_matrix, axis=-1).conj().astype(np.complex64)\n",
"\n",
" return stft_matrix.T\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tracks = librosa.util.get_audio_files('/home/bmcfee/data/CAL500/mp3/')\n",
"print tracks[4]\n",
"y, sr = librosa.load(tracks[4])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"/home/bmcfee/data/CAL500/mp3/aaron_neville-tell_it_like_it_is.mp3\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%timeit\n",
"Z1 = librosa.core.stft(y, hop_length=64)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 4.12 s per loop\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%timeit\n",
"Z2 = stft_new(y, hop_length=64)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 2.43 s per loop\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Z1 = librosa.core.stft(y, hop_length=64)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Z2 = stft_new(y, hop_length=64)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.allclose(Z1, Z2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"True"
]
}
],
"prompt_number": 10
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment