bmcfee · April 19, 2019 13:59
diff --git a/plot_pcen_stream.ipynb b/plot_pcen_stream.ipynb
 {
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# PCEN Streaming\n\n\nThis notebook demonstrates how to use `soundfile` streaming IO with `librosa.pcen` \nto do dynamic per-channel energy normalization on a spectrogram.\n\nThis is useful when processing long audio files that are too large to load all at\nonce, or when streaming data from a recording device.\n\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We'll need numpy and matplotlib for this example\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from __future__ import print_function\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nimport soundfile as sf\n\nimport librosa as librosa\nimport librosa.display as isplay"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "First, we'll start with an audio file that we want to stream\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "filename = librosa.util.example_audio_file()\n\n# We can stream in blocks using soundfile\nsr = sf.info(filename).samplerate"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Next, we'll set up the block reader to work on short segments of \naudio at a time.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# We'll generate 16 frames at a time, each frame having 4096 samples\n# and 50% overlap.\n#\n\nn_fft = 4096\nhop_length = n_fft // 2\n\n# Note that to make sure the last frame of one batch overlaps\n# properly with the first frame of the next, we'll need to tell\n# soundfile to rewind the signal after each block by using the\n# `overlap` parameter\n\n# fill_value pads out the last frame with zeros so that we have a\n# full frame at the end of the signal, even if the signal doesn't\n# divide evenly into full frames.\nblocks = sf.blocks(filename, blocksize=n_fft + 15 * hop_length,\n                   overlap=n_fft - hop_length,\n                   fill_value=0)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "For this example, we'll compute PCEN on each block, average over\nfrequency, and store the results in a list.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Make an array to store the frequency-averaged PCEN values\npcen_blocks = []\n\n# Initialize the PCEN filter delays to steady state\nzi = None\n\nfor block in blocks:\n    # downmix frame to mono (averaging out the channel dimension)\n    y = librosa.to_mono(block.T)\n\n    # Compute the STFT (without padding, so center=False)\n    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,\n                     center=False)\n\n    # Compute PCEN on the magnitude spectrum, using initial delays\n    # returned from our previous call (if any)\n    # store the final delays for use as zi in the next iteration\n    P, zi = librosa.pcen(np.abs(D), sr=sr, hop_length=hop_length,\n                         zi=zi, return_zf=True)\n\n    # Compute the average PCEN over frequency, and append it to our list\n    pcen_blocks.extend(np.mean(P, axis=0))\n\n# Close the block reader\nblocks.close()\n\n# Cast to a numpy array for use downstream\npcen_blocks = np.asarray(pcen_blocks)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "For the sake of comparison, let's see how it would look had we \nrun PCEN on the entire spectrum without block-wise processing\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "y, sr = librosa.load(filename, sr=44100)\n\n# Keep the same parameters as before\nD = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, center=False)\n\n# Compute pcen on the magnitude spectrum.\n# We don't need to worry about initial and final filter delays if\n# we're doing everything in one go.\nP = librosa.pcen(np.abs(D), sr=sr, hop_length=hop_length)\n\npcen_full = np.mean(P, axis=0)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Plot the PCEN spectrum and the resulting magnitudes\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "plt.figure()\n# First, plot the spectrum\nax = plt.subplot(2,1,1)\nlibrosa.display.specshow(P, sr=sr, hop_length=hop_length, x_axis='time', y_axis='log')\nplt.title('PCEN spectrum')\n\n# Now we'll plot the pcen curves\nplt.subplot(2,1,2, sharex=ax)\ntimes = librosa.times_like(pcen_full, sr=sr, hop_length=hop_length)\nplt.plot(times, pcen_full, linewidth=3, alpha=0.25, label='Full signal PCEN')\ntimes = librosa.times_like(pcen_blocks, sr=sr, hop_length=hop_length)\nplt.plot(times, pcen_blocks, linestyle=':', label='Block-wise PCEN')\nplt.legend()\n\n# Zoom in to a short patch to see the fine details\nplt.xlim([30, 40])\n\n# render the plot\nplt.tight_layout()\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.8"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"\n# PCEN Streaming\n\n\nThis notebook demonstrates how to use `soundfile` streaming IO with `librosa.pcen` \nto do dynamic per-channel energy normalization on a spectrogram.\n\nThis is useful when processing long audio files that are too large to load all at\nonce, or when streaming data from a recording device.\n\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We'll need numpy and matplotlib for this example\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from __future__ import print_function\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nimport soundfile as sf\n\nimport librosa as librosa\nimport librosa.display as isplay"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"First, we'll start with an audio file that we want to stream\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"filename = librosa.util.example_audio_file()\n\n# We can stream in blocks using soundfile\nsr = sf.info(filename).samplerate"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Next, we'll set up the block reader to work on short segments of \naudio at a time.\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# We'll generate 16 frames at a time, each frame having 4096 samples\n# and 50% overlap.\n#\n\nn_fft = 4096\nhop_length = n_fft // 2\n\n# Note that to make sure the last frame of one batch overlaps\n# properly with the first frame of the next, we'll need to tell\n# soundfile to rewind the signal after each block by using the\n# `overlap` parameter\n\n# fill_value pads out the last frame with zeros so that we have a\n# full frame at the end of the signal, even if the signal doesn't\n# divide evenly into full frames.\nblocks = sf.blocks(filename, blocksize=n_fft + 15 * hop_length,\n overlap=n_fft - hop_length,\n fill_value=0)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"For this example, we'll compute PCEN on each block, average over\nfrequency, and store the results in a list.\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Make an array to store the frequency-averaged PCEN values\npcen_blocks = []\n\n# Initialize the PCEN filter delays to steady state\nzi = None\n\nfor block in blocks:\n # downmix frame to mono (averaging out the channel dimension)\n y = librosa.to_mono(block.T)\n\n # Compute the STFT (without padding, so center=False)\n D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,\n center=False)\n\n # Compute PCEN on the magnitude spectrum, using initial delays\n # returned from our previous call (if any)\n # store the final delays for use as zi in the next iteration\n P, zi = librosa.pcen(np.abs(D), sr=sr, hop_length=hop_length,\n zi=zi, return_zf=True)\n\n # Compute the average PCEN over frequency, and append it to our list\n pcen_blocks.extend(np.mean(P, axis=0))\n\n# Close the block reader\nblocks.close()\n\n# Cast to a numpy array for use downstream\npcen_blocks = np.asarray(pcen_blocks)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"For the sake of comparison, let's see how it would look had we \nrun PCEN on the entire spectrum without block-wise processing\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"y, sr = librosa.load(filename, sr=44100)\n\n# Keep the same parameters as before\nD = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, center=False)\n\n# Compute pcen on the magnitude spectrum.\n# We don't need to worry about initial and final filter delays if\n# we're doing everything in one go.\nP = librosa.pcen(np.abs(D), sr=sr, hop_length=hop_length)\n\npcen_full = np.mean(P, axis=0)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Plot the PCEN spectrum and the resulting magnitudes\n\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"plt.figure()\n# First, plot the spectrum\nax = plt.subplot(2,1,1)\nlibrosa.display.specshow(P, sr=sr, hop_length=hop_length, x_axis='time', y_axis='log')\nplt.title('PCEN spectrum')\n\n# Now we'll plot the pcen curves\nplt.subplot(2,1,2, sharex=ax)\ntimes = librosa.times_like(pcen_full, sr=sr, hop_length=hop_length)\nplt.plot(times, pcen_full, linewidth=3, alpha=0.25, label='Full signal PCEN')\ntimes = librosa.times_like(pcen_blocks, sr=sr, hop_length=hop_length)\nplt.plot(times, pcen_blocks, linestyle=':', label='Block-wise PCEN')\nplt.legend()\n\n# Zoom in to a short patch to see the fine details\nplt.xlim([30, 40])\n\n# render the plot\nplt.tight_layout()\nplt.show()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}