Created
January 24, 2022 02:50
-
-
Save rkoshy/1a515e6849f8dac791e6da1eee4d6170 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.ioe.tprsa.audio.preProcessings; | |
/** | |
* @author Ganesh Tiwari | |
* @reference 'A New Silence Removal and Endpoint Detection Algorithm | |
* for Speech and Speaker Recognition Applications' by IIT, Khragpur | |
*/ | |
public class EndPointDetection { | |
private float[] originalSignal; //input | |
private float[] silenceRemovedSignal;//output | |
private int samplingRate; | |
private int firstSamples; | |
private int samplePerFrame; | |
public EndPointDetection(float[] originalSignal, int samplingRate) { | |
this.originalSignal = originalSignal; | |
this.samplingRate = samplingRate; | |
samplePerFrame = this.samplingRate / 1000; | |
firstSamples = samplePerFrame * 200;// according to formula | |
} | |
public float[] doEndPointDetection() { | |
// for identifying each sample whether it is voiced or unvoiced | |
float[] voiced = new float[originalSignal.length]; | |
float sum = 0; | |
double sd = 0.0; | |
double m = 0.0; | |
// 1. calculation of mean | |
for (int i = 0; i < firstSamples; i++) { | |
sum += originalSignal[i]; | |
} | |
m = sum / firstSamples;// mean | |
sum = 0;// reuse var for S.D. | |
// 2. calculation of Standard Deviation | |
for (int i = 0; i < firstSamples; i++) { | |
sum += Math.pow((originalSignal[i] - m), 2); | |
} | |
sd = Math.sqrt(sum / firstSamples); | |
// 3. identifying one-dimensional Mahalanobis distance function | |
// i.e. |x-u|/s greater than ####3 or not, | |
for (int i = 0; i < originalSignal.length; i++) { | |
if ((Math.abs(originalSignal[i] - m) / sd) > 0.3) { //0.3 =THRESHOLD.. adjust value yourself | |
voiced[i] = 1; | |
} else { | |
voiced[i] = 0; | |
} | |
} | |
// 4. calculation of voiced and unvoiced signals | |
// mark each frame to be voiced or unvoiced frame | |
int frameCount = 0; | |
int usefulFramesCount = 1; | |
int count_voiced = 0; | |
int count_unvoiced = 0; | |
int voicedFrame[] = new int[originalSignal.length / samplePerFrame]; | |
// the following calculation truncates the remainder | |
int loopCount = originalSignal.length - (originalSignal.length % samplePerFrame); | |
for (int i = 0; i < loopCount; i += samplePerFrame) { | |
count_voiced = 0; | |
count_unvoiced = 0; | |
for (int j = i; j < i + samplePerFrame; j++) { | |
if (voiced[j] == 1) { | |
count_voiced++; | |
} else { | |
count_unvoiced++; | |
} | |
} | |
if (count_voiced > count_unvoiced) { | |
usefulFramesCount++; | |
voicedFrame[frameCount++] = 1; | |
} else { | |
voicedFrame[frameCount++] = 0; | |
} | |
} | |
// 5. silence removal | |
silenceRemovedSignal = new float[usefulFramesCount * samplePerFrame]; | |
int k = 0; | |
for (int i = 0; i < frameCount; i++) { | |
if (voicedFrame[i] == 1) { | |
for (int j = i * samplePerFrame; j < i * samplePerFrame + samplePerFrame; j++) { | |
silenceRemovedSignal[k++] = originalSignal[j]; | |
} | |
} | |
} | |
// end | |
return silenceRemovedSignal; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment