Skip to content

Instantly share code, notes, and snippets.

@dsherret
Last active January 7, 2020 22:05
Show Gist options
  • Save dsherret/565fa14b4e417170918b1f8747fe8ad1 to your computer and use it in GitHub Desktop.
Save dsherret/565fa14b4e417170918b1f8747fe8ad1 to your computer and use it in GitHub Desktop.
// Adapted from: https://github.com/Sciss/SpeechRecognitionHMM/blob/master/src/main/java/org/ioe/tprsa/audio/feature/MFCC.java
using System;
using System.Diagnostics;
namespace CommonShared.Audio
{
public class MelFrequencyBinner
{
private const int _melFiltersCount = Constants.MEL_FILTERS_COUNT;
private readonly double _sampleRate;
private readonly double _upperFilterFreq;
private readonly double _lowerFilterFreq;
private readonly int _samplesPerFrame;
private readonly int[] _centerFrequencyIndicies;
public MelFrequencyBinner(int samplesPerFrame, double sampleRate,
FrequencyRange? frequencyRange = null)
{
frequencyRange = frequencyRange ?? Constants.MEL_FREQUENCY_RANGE;
const double minimumFrequency = 25.0;
double maxFrequency = sampleRate / 2.0;
if (frequencyRange.Value.Min < minimumFrequency)
throw new ArgumentOutOfRangeException(nameof(frequencyRange), $"Frequency range must be greater or equal to {minimumFrequency}");
if (frequencyRange.Value.Max > maxFrequency)
throw new ArgumentOutOfRangeException(nameof(frequencyRange), $"Frequency range must be less than or equal to {maxFrequency}");
_samplesPerFrame = samplesPerFrame;
_sampleRate = sampleRate;
_lowerFilterFreq = frequencyRange.Value.Min;
_upperFilterFreq = frequencyRange.Value.Max;
_centerFrequencyIndicies = GetCenterFrequencyIndiciesForInitialization(); // same for all
}
public int BufferSize => _melFiltersCount;
public void Process(double[] magnitudeSpectrum, double[] buffer)
{
Debug.Assert(buffer.Length == _melFiltersCount);
FillMelFilterBank(magnitudeSpectrum, buffer);
}
/// <summary>
/// Performs mel filter operation.
/// </summary>
/// <param name="bin">Magnitude spectrum (| |)^2 of fft</param>
/// <returns>Mel filtered coefficients --> filter bank coefficients.</returns>
private void FillMelFilterBank(double[] magnitudeSpectrum, double[] melFilterBank)
{
for (int k = 1; k <= _melFiltersCount; k++)
{
double num1 = 0.0, num2 = 0.0;
double den = _centerFrequencyIndicies[k] - _centerFrequencyIndicies[k - 1] + 1;
for (int i = _centerFrequencyIndicies[k - 1]; i <= _centerFrequencyIndicies[k]; i++)
{
num1 += magnitudeSpectrum[i] * (i - _centerFrequencyIndicies[k - 1] + 1);
}
num1 /= den;
den = _centerFrequencyIndicies[k + 1] - _centerFrequencyIndicies[k] + 1;
for (int i = _centerFrequencyIndicies[k] + 1; i <= _centerFrequencyIndicies[k + 1]; i++)
num2 += magnitudeSpectrum[i] * (1 - ((i - _centerFrequencyIndicies[k]) / den));
melFilterBank[k - 1] = num1 + num2;
}
}
private int[] GetCenterFrequencyIndiciesForInitialization()
{
int[] centerFrequencyIndicies = new int[_melFiltersCount + 2];
centerFrequencyIndicies[0] = (int)Math.Round(_lowerFilterFreq / _sampleRate * _samplesPerFrame);
centerFrequencyIndicies[centerFrequencyIndicies.Length - 1] = (_samplesPerFrame / 2);
for (int i = 1; i <= _melFiltersCount; i++)
{
double fc = CenterFreq(i);
centerFrequencyIndicies[i] = (int)Math.Round(fc / _sampleRate * _samplesPerFrame);
}
return centerFrequencyIndicies;
}
private double CenterFreq(int i)
{
double melFLow = FreqToMel(_lowerFilterFreq);
double melFHigh = FreqToMel(_upperFilterFreq);
double temp = melFLow + ((melFHigh - melFLow) / (_melFiltersCount + 1.0)) * i;
return InverseMel(temp);
}
private static double InverseMel(double x)
{
double temp = Math.Pow(10, x / 2595.0) - 1.0;
return 700.0 * (temp);
}
private static double FreqToMel(double freq)
{
return 2595.0 * Math.Log10(1 + freq / 700.0);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment