Last active
March 16, 2020 08:49
-
-
Save JeffBrand/93843d5583ce44cfc3319ed5f17e324b to your computer and use it in GitHub Desktop.
Azure Function for Transcribing Speech
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "Microsoft.WindowsAzure.Storage" | |
#r "Newtonsoft.Json" | |
using System.Net; | |
using Microsoft.CognitiveServices.SpeechRecognition; | |
public class ReportItem{ | |
public string id {get;set;} | |
public string audioFile {get;set;} | |
public string audioText {get;set;} | |
} | |
public class Report | |
{ | |
public string id {get;set;} | |
public string title {get;set;} | |
public List<ReportItem> items {get; set;} | |
} | |
public static async Task<HttpResponseMessage> Run(Report report, IAsyncCollector<object> outputDocument, TraceWriter log) | |
{ | |
log.Info($"C# HTTP trigger function processing a request..."); | |
//var json = await req.Content.ReadAsStringAsync(); | |
//var report = Newtonsoft.Json.JsonConvert.DeserializeObject<report>(json); | |
if (report?.items?.Count > 0) { | |
log.Info($"{report.items.Count} items found."); | |
var storageAccount = Microsoft.WindowsAzure.Storage.CloudStorageAccount.Parse(AzureStorageAccount.ConnectionString); | |
var blobClient = storageAccount.CreateCloudBlobClient(); | |
var container = blobClient.GetContainerReference(AzureStorageAccount.ContainerName); | |
var audioItems = from i in report.items where !string.IsNullOrWhiteSpace(i.audioFile) select i; | |
foreach(var item in audioItems) | |
{ | |
log.Info($"Processing {item.id}..."); | |
var audioHandler = new AudioHandler(); | |
item.audioText = await audioHandler.ProcessBlob(container, item.audioFile, log); | |
log.Info($"[{item.audioFile}] transcribed: {item.audioText}"); | |
} | |
await outputDocument.AddAsync(report); | |
return new HttpResponseMessage(HttpStatusCode.OK); | |
} | |
return new HttpResponseMessage(HttpStatusCode.BadRequest); | |
} | |
public abstract class AzureStorageAccount | |
{ | |
public static string ConnectionString = "DefaultEndpointsProtocol=https;AccountName=<your-account-name>;AccountKey=<your-account-key>"; | |
public static string ContainerName = "upload"; | |
} | |
public class AudioHandler { | |
TaskCompletionSource<string> _tcs; | |
static DataRecognitionClient _dataClient; | |
static AudioHandler() | |
{ | |
_dataClient = SpeechRecognitionServiceFactory.CreateDataClient( | |
SpeechRecognitionMode.LongDictation, | |
"en-US", | |
"<your-api-key>"); | |
} | |
public AudioHandler() | |
{ | |
_dataClient.OnResponseReceived += responseHandler; | |
} | |
private void responseHandler(object sender, SpeechResponseEventArgs args){ | |
if (args.PhraseResponse.Results.Length == 0) | |
_tcs.SetResult("ERROR: Bad audio"); | |
else | |
_tcs.SetResult(args.PhraseResponse.Results[0].DisplayText); | |
var client = sender as DataRecognitionClient; | |
client.OnResponseReceived -= responseHandler; | |
} | |
public Task<string> ProcessBlob(Microsoft.WindowsAzure.Storage.Blob.CloudBlobContainer container, string blobName, TraceWriter log) | |
{ | |
_tcs = new TaskCompletionSource<string>(); | |
var mem = new System.IO.MemoryStream(); | |
log.Info("Ready to read blob"); | |
var blockBlob = container.GetBlockBlobReference(blobName); | |
blockBlob.DownloadToStream(mem); | |
log.Info("Blob read - size=" + mem.Length); | |
mem.Position = 0; | |
int bytesRead = 0; | |
byte[] buffer = new byte[1024]; | |
try | |
{ | |
do | |
{ | |
bytesRead = mem.Read(buffer, 0, buffer.Length); | |
_dataClient.SendAudio(buffer, bytesRead); | |
} | |
while (bytesRead > 0); | |
log.Info("Done Reading bytes"); | |
} | |
finally | |
{ | |
_dataClient.EndAudio(); | |
log.Info("Finished"); | |
} | |
log.Info("Returning"); | |
return _tcs.Task; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment