Last active
January 7, 2020 22:05
-
-
Save dsherret/565fa14b4e417170918b1f8747fe8ad1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Adapted from: https://github.com/Sciss/SpeechRecognitionHMM/blob/master/src/main/java/org/ioe/tprsa/audio/feature/MFCC.java | |
using System; | |
using System.Diagnostics; | |
namespace CommonShared.Audio | |
{ | |
public class MelFrequencyBinner | |
{ | |
private const int _melFiltersCount = Constants.MEL_FILTERS_COUNT; | |
private readonly double _sampleRate; | |
private readonly double _upperFilterFreq; | |
private readonly double _lowerFilterFreq; | |
private readonly int _samplesPerFrame; | |
private readonly int[] _centerFrequencyIndicies; | |
public MelFrequencyBinner(int samplesPerFrame, double sampleRate, | |
FrequencyRange? frequencyRange = null) | |
{ | |
frequencyRange = frequencyRange ?? Constants.MEL_FREQUENCY_RANGE; | |
const double minimumFrequency = 25.0; | |
double maxFrequency = sampleRate / 2.0; | |
if (frequencyRange.Value.Min < minimumFrequency) | |
throw new ArgumentOutOfRangeException(nameof(frequencyRange), $"Frequency range must be greater or equal to {minimumFrequency}"); | |
if (frequencyRange.Value.Max > maxFrequency) | |
throw new ArgumentOutOfRangeException(nameof(frequencyRange), $"Frequency range must be less than or equal to {maxFrequency}"); | |
_samplesPerFrame = samplesPerFrame; | |
_sampleRate = sampleRate; | |
_lowerFilterFreq = frequencyRange.Value.Min; | |
_upperFilterFreq = frequencyRange.Value.Max; | |
_centerFrequencyIndicies = GetCenterFrequencyIndiciesForInitialization(); // same for all | |
} | |
public int BufferSize => _melFiltersCount; | |
public void Process(double[] magnitudeSpectrum, double[] buffer) | |
{ | |
Debug.Assert(buffer.Length == _melFiltersCount); | |
FillMelFilterBank(magnitudeSpectrum, buffer); | |
} | |
/// <summary> | |
/// Performs mel filter operation. | |
/// </summary> | |
/// <param name="bin">Magnitude spectrum (| |)^2 of fft</param> | |
/// <returns>Mel filtered coefficients --> filter bank coefficients.</returns> | |
private void FillMelFilterBank(double[] magnitudeSpectrum, double[] melFilterBank) | |
{ | |
for (int k = 1; k <= _melFiltersCount; k++) | |
{ | |
double num1 = 0.0, num2 = 0.0; | |
double den = _centerFrequencyIndicies[k] - _centerFrequencyIndicies[k - 1] + 1; | |
for (int i = _centerFrequencyIndicies[k - 1]; i <= _centerFrequencyIndicies[k]; i++) | |
{ | |
num1 += magnitudeSpectrum[i] * (i - _centerFrequencyIndicies[k - 1] + 1); | |
} | |
num1 /= den; | |
den = _centerFrequencyIndicies[k + 1] - _centerFrequencyIndicies[k] + 1; | |
for (int i = _centerFrequencyIndicies[k] + 1; i <= _centerFrequencyIndicies[k + 1]; i++) | |
num2 += magnitudeSpectrum[i] * (1 - ((i - _centerFrequencyIndicies[k]) / den)); | |
melFilterBank[k - 1] = num1 + num2; | |
} | |
} | |
private int[] GetCenterFrequencyIndiciesForInitialization() | |
{ | |
int[] centerFrequencyIndicies = new int[_melFiltersCount + 2]; | |
centerFrequencyIndicies[0] = (int)Math.Round(_lowerFilterFreq / _sampleRate * _samplesPerFrame); | |
centerFrequencyIndicies[centerFrequencyIndicies.Length - 1] = (_samplesPerFrame / 2); | |
for (int i = 1; i <= _melFiltersCount; i++) | |
{ | |
double fc = CenterFreq(i); | |
centerFrequencyIndicies[i] = (int)Math.Round(fc / _sampleRate * _samplesPerFrame); | |
} | |
return centerFrequencyIndicies; | |
} | |
private double CenterFreq(int i) | |
{ | |
double melFLow = FreqToMel(_lowerFilterFreq); | |
double melFHigh = FreqToMel(_upperFilterFreq); | |
double temp = melFLow + ((melFHigh - melFLow) / (_melFiltersCount + 1.0)) * i; | |
return InverseMel(temp); | |
} | |
private static double InverseMel(double x) | |
{ | |
double temp = Math.Pow(10, x / 2595.0) - 1.0; | |
return 700.0 * (temp); | |
} | |
private static double FreqToMel(double freq) | |
{ | |
return 2595.0 * Math.Log10(1 + freq / 700.0); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment