-
-
Save lukehoban/0ee5c1bef438dc5bd7cb to your computer and use it in GitHub Desktop.
var fs = require('fs'); | |
var util = require('util'); | |
var request = require('request'); | |
var clientId = 'test-app'; // Can be anything | |
var clientSecret = 'f6f0bfec08274b8790520a9079b808af'; // API key from Azure marketplace | |
var str = 'This is a cool demo to call Microsoft text to speach service in Node.js.'; | |
console.log('Converting from text -> speech -> text.'); | |
console.log('Input text: "' + str + '"'); | |
getAccessToken(clientId, clientSecret, function(err, accessToken) { | |
if(err) return console.log(err); | |
console.log('Got access token: ' + accessToken) | |
textToSpeech(str, 'test.wav', accessToken, function(err) { | |
if(err) return console.log(err); | |
console.log('Wrote out: ' + 'test.wav'); | |
speechToText('test.wav', accessToken, function(err, res) { | |
if(err) return console.log(err); | |
console.log('Confidence ' + res.results[0].confidence + ' for: "' + res.results[0].lexical + '"'); | |
}); | |
}); | |
}) | |
// ==== Helpers ==== | |
function getAccessToken(clientId, clientSecret, callback) { | |
request.post({ | |
url: 'https://oxford-speech.cloudapp.net/token/issueToken', | |
form: { | |
'grant_type': 'client_credentials', | |
'client_id': encodeURIComponent(clientId), | |
'client_secret': encodeURIComponent(clientSecret), | |
'scope': 'https://speech.platform.bing.com' | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
try { | |
var accessToken = JSON.parse(body).access_token; | |
if(accessToken) { | |
callback(null, accessToken); | |
} else { | |
callback(body); | |
} | |
} catch(e) { | |
callback(e); | |
} | |
}); | |
} | |
function textToSpeech(text, filename, accessToken, callback) { | |
var ssmlTemplate = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='%s' xml:gender='%s' name='%s'>%s</voice></speak>"; | |
request.post({ | |
url: 'http://speech.platform.bing.com/synthesize', | |
body: util.format(ssmlTemplate, 'en-US', 'Female', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)', text), | |
encoding: null, | |
headers: { | |
'Authorization': 'Bearer ' + accessToken, | |
'Content-Type' : 'application/ssml+xml', | |
'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm', | |
'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA', | |
'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960', | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
fs.writeFile(filename, body, 'binary', function (err) { | |
if (err) return callback(err); | |
callback(null); | |
}); | |
}); | |
} | |
function speechToText(filename, accessToken, callback) { | |
fs.readFile(filename, function(err, waveData) { | |
if(err) return callback(err); | |
request.post({ | |
url: 'https://speech.platform.bing.com/recognize/query', | |
qs: { | |
'scenarios': 'ulm', | |
'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5', // This magic value is required | |
'locale': 'en-US', | |
'device.os': 'wp7', | |
'version': '3.0', | |
'format': 'json', | |
'requestid': '1d4b6030-9099-11e0-91e4-0800200c9a66', // can be anything | |
'instanceid': '1d4b6030-9099-11e0-91e4-0800200c9a66' // can be anything | |
}, | |
body: waveData, | |
headers: { | |
'Authorization': 'Bearer ' + accessToken, | |
'Content-Type': 'audio/wav; samplerate=16000', | |
'Content-Length' : waveData.length | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
try { | |
callback(null, JSON.parse(body)); | |
} catch(e) { | |
callback(e); | |
} | |
}); | |
}); | |
} |
I am also getting service not available .Anyone found resolution?. same error with micorsoft's sample code as well.
I dont get the problem when I test it on Postman. But I get the problem everywhere else lol
@myadhdoutlet did you fix the issue with the static? I had the same issue and I fixed it adding encoding: null
to the request. This way you get the response as a Buffer. See https://github.com/palmerabollo/bingspeech-api-client/blob/master/src/client.ts#L102
@lukehoban From where did you get 'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA' and 'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960'?
Hi,
The voice recognition process works fine but only if my sentence contains at least 5 or 6 syllables. Is there any limitation that prevent shortest sentences to be recognized?
Thanks
From where did you get 'X-Search-AppId': '' and 'X-Search-ClientID': ''?
@miparnisari hello,From where did you get 'X-Search-AppId': '' and 'X-Search-ClientID': ''?
I receive the "Services Not Working" error if I use the http: endpoint instead of https:, I'm still not getting successful responses, but I'm not getting That particular error.