Created
August 16, 2012 04:36
-
-
Save ogazitt/3366917 to your computer and use it in GitHub Desktop.
Encode a PCM byte array using NSpeex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// I call EncodeSpeech() with a byte array containing PCM-encoded audio data that I get from the | |
// microphone (with len being the number of encoded bytes in the buffer, in case this is the | |
// last chunk and the buffer is only partially filled). | |
// In my app I send the returned byte array to a service for processing - hence I prefix the | |
// encoded Speex data it with length information. | |
// The reason that I prefix the data with both the original PCM buffer size as well as the | |
// the size of the encoded chunk is that the DECODER needs to know the original buffer size, | |
// otherwise it does not decode properly. Of course the size of the encoded chunk is to "frame" | |
// the chunks for the service, which may read multiple chunks off a stream. | |
private static Microphone mic = Microphone.Default; | |
private static byte[] EncodeSpeech(byte[] buf, int len) | |
{ | |
BandMode mode = GetBandMode(mic.SampleRate); | |
SpeexEncoder encoder = new SpeexEncoder(mode); | |
// set encoding quality to lowest (which will generate the smallest size in the fastest time) | |
encoder.Quality = 1; | |
int inDataSize = len / 2; | |
// convert to short array | |
short[] data = new short[inDataSize]; | |
int sampleIndex = 0; | |
for (int index = 0; index < len; index += 2, sampleIndex++) | |
{ | |
data[sampleIndex] = BitConverter.ToInt16(buf, index); | |
} | |
// note: the number of samples per frame must be a multiple of encoder.FrameSize | |
inDataSize = inDataSize - inDataSize % encoder.FrameSize; | |
var encodedData = new byte[len]; | |
int encodedBytes = encoder.Encode(data, 0, inDataSize, encodedData, 0, len); | |
if (encodedBytes != 0) | |
{ | |
// each chunk is laid out as follows: | |
// | 4-byte total chunk size | 4-byte encoded buffer size | <encoded-bytes> | | |
byte[] inDataSizeBuf = BitConverter.GetBytes(inDataSize); | |
byte[] sizeBuf = BitConverter.GetBytes(encodedBytes + inDataSizeBuf.Length); | |
byte[] returnBuf = new byte[encodedBytes + sizeBuf.Length + inDataSizeBuf.Length]; | |
sizeBuf.CopyTo(returnBuf, 0); | |
inDataSizeBuf.CopyTo(returnBuf, sizeBuf.Length); | |
Array.Copy(encodedData, 0, returnBuf, sizeBuf.Length + inDataSizeBuf.Length, encodedBytes); | |
return returnBuf; | |
} | |
else | |
return buf; | |
} | |
private static BandMode GetBandMode(int sampleRate) | |
{ | |
if (sampleRate <= 8000) | |
return BandMode.Narrow; | |
if (sampleRate <= 16000) | |
return BandMode.Wide; | |
return BandMode.UltraWide; | |
} |
Here's the code:
private byte[] DecodeSpeech(byte[] buf)
{
BandMode mode = GetBandMode(microphone.SampleRate);
SpeexDecoder decoder = new SpeexDecoder(mode);
byte[] inDataSizeBuf = new byte[4];
byte[] sizeBuf = new byte[4];
byte[] encodedBuf = new byte[buf.Length - 8];
Array.Copy(buf, 0, sizeBuf, 0, 4);
Array.Copy(buf, 4, inDataSizeBuf, 0, 4);
Array.Copy(buf, 8, encodedBuf, 0, buf.Length - 8);
int inDataSize = BitConverter.ToInt32(inDataSizeBuf, 0);
int size = BitConverter.ToInt32(sizeBuf, 0);
short[] decodedBuf = new short[inDataSize];
int decodedSize = decoder.Decode(encodedBuf, 0, encodedBuf.Length, decodedBuf, 0, false);
byte[] returnBuf = new byte[inDataSize*2];
for (int index = 0; index < decodedBuf.Length; index++)
{
byte[] temp = BitConverter.GetBytes(decodedBuf[index]);
Array.Copy(temp, 0, returnBuf, index * 2, 2);
}
return returnBuf;
}
I am also facing issues while decoding. Can anyone share how to decode using Speex ?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, I use SpeexDecoder to decode the data, but cannot get the same short[] data as before encoded. Do you have the same issue? or could you give me a sample how to decode the data?