ogazitt · August 16, 2012 04:36 · tianj · Sep 6, 2012 · tianj · Sep 6, 2012
diff --git a/gistfile1.txt b/gistfile1.txt
 // I call EncodeSpeech() with a byte array containing PCM-encoded audio data that I get from the 
 // microphone (with len being the number of encoded bytes in the buffer, in case this is the 
 // last chunk and the buffer is only partially filled).  
 // In my app I send the returned byte array to a service for processing - hence I prefix the 
 // encoded Speex data it with length information.
 // The reason that I prefix the data with both the original PCM buffer size as well as the 
 // the size of the encoded chunk is that the DECODER needs to know the original buffer size, 
 // otherwise it does not decode properly.  Of course the size of the encoded chunk is to "frame"
 // the chunks for the service, which may read multiple chunks off a stream.

        private static Microphone mic = Microphone.Default;

        private static byte[] EncodeSpeech(byte[] buf, int len)
        {
            BandMode mode = GetBandMode(mic.SampleRate);
            SpeexEncoder encoder = new SpeexEncoder(mode);
            
            // set encoding quality to lowest (which will generate the smallest size in the fastest time)
            encoder.Quality = 1;

            int inDataSize = len / 2;
            // convert to short array
            short[] data = new short[inDataSize];
            int sampleIndex = 0;
            for (int index = 0; index < len; index += 2, sampleIndex++)
            {
                data[sampleIndex] = BitConverter.ToInt16(buf, index);
            }

            // note: the number of samples per frame must be a multiple of encoder.FrameSize
            inDataSize = inDataSize - inDataSize % encoder.FrameSize;

            var encodedData = new byte[len];
            int encodedBytes = encoder.Encode(data, 0, inDataSize, encodedData, 0, len);
            if (encodedBytes != 0)
            {
                // each chunk is laid out as follows:
                // | 4-byte total chunk size | 4-byte encoded buffer size | <encoded-bytes> |
                byte[] inDataSizeBuf = BitConverter.GetBytes(inDataSize);
                byte[] sizeBuf = BitConverter.GetBytes(encodedBytes + inDataSizeBuf.Length);
                byte[] returnBuf = new byte[encodedBytes + sizeBuf.Length + inDataSizeBuf.Length];
                sizeBuf.CopyTo(returnBuf, 0);
                inDataSizeBuf.CopyTo(returnBuf, sizeBuf.Length);
                Array.Copy(encodedData, 0, returnBuf, sizeBuf.Length + inDataSizeBuf.Length, encodedBytes);
                return returnBuf;
            }
            else
                return buf;
        }

        private static BandMode GetBandMode(int sampleRate)
        {
            if (sampleRate <= 8000)
                return BandMode.Narrow;
            if (sampleRate <= 16000)
                return BandMode.Wide;
            return BandMode.UltraWide;
        }
	// I call EncodeSpeech() with a byte array containing PCM-encoded audio data that I get from the
	// microphone (with len being the number of encoded bytes in the buffer, in case this is the
	// last chunk and the buffer is only partially filled).
	// In my app I send the returned byte array to a service for processing - hence I prefix the
	// encoded Speex data it with length information.
	// The reason that I prefix the data with both the original PCM buffer size as well as the
	// the size of the encoded chunk is that the DECODER needs to know the original buffer size,
	// otherwise it does not decode properly. Of course the size of the encoded chunk is to "frame"
	// the chunks for the service, which may read multiple chunks off a stream.

	private static Microphone mic = Microphone.Default;

	private static byte[] EncodeSpeech(byte[] buf, int len)
	{
	BandMode mode = GetBandMode(mic.SampleRate);
	SpeexEncoder encoder = new SpeexEncoder(mode);

	// set encoding quality to lowest (which will generate the smallest size in the fastest time)
	encoder.Quality = 1;

	int inDataSize = len / 2;
	// convert to short array
	short[] data = new short[inDataSize];
	int sampleIndex = 0;
	for (int index = 0; index < len; index += 2, sampleIndex++)
	{
	data[sampleIndex] = BitConverter.ToInt16(buf, index);
	}

	// note: the number of samples per frame must be a multiple of encoder.FrameSize
	inDataSize = inDataSize - inDataSize % encoder.FrameSize;

	var encodedData = new byte[len];
	int encodedBytes = encoder.Encode(data, 0, inDataSize, encodedData, 0, len);
	if (encodedBytes != 0)
	{
	// each chunk is laid out as follows:
	// \| 4-byte total chunk size \| 4-byte encoded buffer size \| <encoded-bytes> \|
	byte[] inDataSizeBuf = BitConverter.GetBytes(inDataSize);
	byte[] sizeBuf = BitConverter.GetBytes(encodedBytes + inDataSizeBuf.Length);
	byte[] returnBuf = new byte[encodedBytes + sizeBuf.Length + inDataSizeBuf.Length];
	sizeBuf.CopyTo(returnBuf, 0);
	inDataSizeBuf.CopyTo(returnBuf, sizeBuf.Length);
	Array.Copy(encodedData, 0, returnBuf, sizeBuf.Length + inDataSizeBuf.Length, encodedBytes);
	return returnBuf;
	}
	else
	return buf;
	}

	private static BandMode GetBandMode(int sampleRate)
	{
	if (sampleRate <= 8000)
	return BandMode.Narrow;
	if (sampleRate <= 16000)
	return BandMode.Wide;
	return BandMode.UltraWide;
	}
No results found