-
-
Save nfriedly/0240e862901474a9447a600e5795d500 to your computer and use it in GitHub Desktop.
// Note: The official .net SDK is in progress. It doesn't support streaming Speech to Text at the time of writing, | |
// but it will soon. Please check it out before using this code. | |
// | |
// https://github.com/watson-developer-cloud/dotnet-standard-sdk | |
using System; | |
using System.Net.WebSockets; | |
using System.Net; | |
using System.Runtime.Serialization.Json; | |
using System.Threading; | |
using System.Threading.Tasks; | |
using System.Text; | |
using System.IO; | |
using System.Runtime.Serialization; | |
// Perform streaming transcription of an audio file using the IBM Watson Speech to Text service over a websocket | |
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text.html | |
// https://msdn.microsoft.com/en-us/library/system.net.websockets.clientwebsocket%28v=vs.110%29.aspx | |
namespace WatsonSTTWebsocketExample | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
Transcribe(); | |
Console.WriteLine("Press any key to exit"); | |
Console.ReadLine(); | |
} | |
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/doc/getting_started/gs-credentials.shtml | |
static String username = "<username>"; | |
static String password = "<password>"; | |
static String file = @"c:\audio.wav"; | |
static Uri url = new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize"); | |
// these should probably be private classes that use DataContractJsonSerializer | |
// see https://msdn.microsoft.com/en-us/library/bb412179%28v=vs.110%29.aspx | |
// or the ServiceState class at the end | |
static ArraySegment<byte> openingMessage = new ArraySegment<byte>( Encoding.UTF8.GetBytes( | |
"{\"action\": \"start\", \"content-type\": \"audio/wav\", \"continuous\" : true, \"interim_results\": true}" | |
)); | |
static ArraySegment<byte> closingMessage = new ArraySegment<byte>(Encoding.UTF8.GetBytes( | |
"{\"action\": \"stop\"}" | |
)); | |
static void Transcribe() | |
{ | |
var ws = new ClientWebSocket(); | |
ws.Options.Credentials = new NetworkCredential(username, password); | |
ws.ConnectAsync(url, CancellationToken.None).Wait(); | |
// send opening message and wait for initial delimeter | |
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws)); | |
// send all audio and then a closing message; simltaneously print all results until delimeter is recieved | |
Task.WaitAll(SendAudio(ws), HandleResults(ws)); | |
// close down the websocket | |
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait(); | |
} | |
static async Task SendAudio(ClientWebSocket ws) | |
{ | |
using (FileStream fs = File.OpenRead(file)) | |
{ | |
byte[] b = new byte[1024]; | |
while (fs.Read(b, 0, b.Length) > 0) | |
{ | |
await ws.SendAsync(new ArraySegment<byte>(b), WebSocketMessageType.Binary, true, CancellationToken.None); | |
} | |
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None); | |
} | |
} | |
// prints results until the connection closes or a delimeterMessage is recieved | |
static async Task HandleResults(ClientWebSocket ws) | |
{ | |
var buffer = new byte[1024]; | |
while (true) | |
{ | |
var segment = new ArraySegment<byte>(buffer); | |
var result = await ws.ReceiveAsync(segment, CancellationToken.None); | |
if (result.MessageType == WebSocketMessageType.Close) | |
{ | |
return; | |
} | |
int count = result.Count; | |
while (!result.EndOfMessage) | |
{ | |
if (count >= buffer.Length) | |
{ | |
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None); | |
return; | |
} | |
segment = new ArraySegment<byte>(buffer, count, buffer.Length - count); | |
result = await ws.ReceiveAsync(segment, CancellationToken.None); | |
count += result.Count; | |
} | |
var message = Encoding.UTF8.GetString(buffer, 0, count); | |
// you'll probably want to parse the JSON into a useful object here, | |
// see ServiceState and IsDelimeter for a light-weight example of that. | |
Console.WriteLine(message); | |
if (IsDelimeter(message)) | |
{ | |
return; | |
} | |
} | |
} | |
// the watson service sends a {"state": "listening"} message at both the beginning and the *end* of the results | |
// this checks for that | |
[DataContract] | |
internal class ServiceState | |
{ | |
[DataMember] | |
public string state = ""; | |
} | |
static bool IsDelimeter(String json) | |
{ | |
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json)); | |
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState)); | |
ServiceState obj = (ServiceState)ser.ReadObject(stream); | |
return obj.state == "listening"; | |
} | |
} | |
} |
I'm running into a snag on line 51
Hi,
I'm into the same problem right now. Did you find any solution for this?
Thanks,
Gianmarco
[UPDATE] I realized that there was a mistake with my credentials. Now it works fine.
Thank you
Hi ,
I based my example of this one, however by extending the watsons parameters( which shouldnt matter), I keep getting an exception of: "There is already one outstanding 'SendAsync' call for this WebSocket instance. ReceiveAsync and SendAsync can be called simultaneously, but at most one outstanding operation for each of them is allowed at the same time." Which then results in WebsocketState = Aborted. Has anyone come across this?
thanks,
Kristina
How to pass a parameter to specify language? I intend to use with Portuguese language.
It appears WebSocket is not supported by Windows 7 as seen in this issue 5andr0/PogoLocationFeeder#229
Would you happen to know of an alternative for Windows 7 users? This otherwise looks like a fantastic example
Hello Everyone,
I did "wav to text" integration successfully using c# and Iam Key. However, Now i want to do the same from Live microphone audio stream to text.
Please advise.
Regards,
Palak
Is this method still the recommanded one for STT over websocket? Thanks.
Hi there,
Your sample here looks perfect for what I'm trying to do, all I want to do is test out Watson's Speech-to-Text functionality. However, I'm running into a snag on line 51, I keep getting a 500 error... I've made sure the URI is the correct one, etc. I'm wondering if you have any insite into this? Maybe their system isn't working right now?
Thanks,
Scott