Last active
January 19, 2016 18:14
-
-
Save miduku/424c7a9471f95f1387f4 to your computer and use it in GitHub Desktop.
speech-to-text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install SoX | |
create /data folder | |
create /config/default.json | |
npm install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var config = require('config'); | |
// thanks to: https://gist.github.com/lukehoban/0ee5c1bef438dc5bd7cb | |
var fs = require('fs'); | |
var util = require('util'); | |
var request = require('request'); | |
var clientId = 'test-app'; // Can be anything | |
var clientSecret = config.get('key'); // API key from Azure marketplace | |
var dirData = 'data/'; // set directory where thing will be saved and read | |
var locAudio = 'audio.wav'; // set where the audio file will be | |
var locTxt = 'text.txt'; // set where the test will be saved | |
getAccessToken(clientId, clientSecret, function(err, accessToken) { | |
if(err) return console.log(err); | |
console.log('Got access token: ' + accessToken); | |
speechToText(dirData + locAudio, accessToken, function(err, res) { | |
if(err) return console.log(err); | |
var resConfidence = res.results[0].confidence; // result of confidence | |
var resLexical = res.results[0].lexical; // result of written text from audio | |
console.log('Confidence ' + resConfidence + ' for: "' + resLexical + '"'); | |
fs.writeFile(dirData + locTxt, resLexical, function(err) { | |
if(err) return console.log(err); | |
console.log('Written text into file'); | |
}); | |
}); | |
}); | |
// getAccessToken(clientId, clientSecret, function(err, accessToken) { | |
// if(err) return console.log(err); | |
// console.log('Got access token: ' + accessToken); | |
// textToSpeech(str, dirData + locAudio, accessToken, function(err) { | |
// if(err) return console.log(err); | |
// console.log('Wrote out: ' + dirData + locAudio); | |
// speechToText(dirData + locAudio, accessToken, function(err, res) { | |
// if(err) return console.log(err); | |
// console.log('Confidence ' + res.results[0].confidence + ' for: "' + res.results[0].lexical + '"'); | |
// }); | |
// }); | |
// }); | |
// ==== Helpers ==== | |
function getAccessToken(clientId, clientSecret, callback) { | |
request.post({ | |
url: 'https://oxford-speech.cloudapp.net/token/issueToken', | |
form: { | |
'grant_type': 'client_credentials', | |
'client_id': encodeURIComponent(clientId), | |
'client_secret': encodeURIComponent(clientSecret), | |
'scope': 'https://speech.platform.bing.com' | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
try { | |
var accessToken = JSON.parse(body).access_token; | |
if(accessToken) { | |
callback(null, accessToken); | |
} else { | |
callback(body); | |
} | |
} catch(e) { | |
callback(e); | |
} | |
}); | |
} | |
function textToSpeech(text, filename, accessToken, callback) { | |
var ssmlTemplate = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='%s' xml:gender='%s' name='%s'>%s</voice></speak>"; | |
request.post({ | |
url: 'http://speech.platform.bing.com/synthesize', | |
body: util.format(ssmlTemplate, 'en-US', 'Female', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)', text), | |
encoding: null, | |
headers: { | |
'Authorization': 'Bearer ' + accessToken, | |
'Content-Type' : 'application/ssml+xml', | |
'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm', | |
'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA', | |
'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960', | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
fs.writeFile(filename, body, 'binary', function (err) { | |
if (err) return callback(err); | |
callback(null); | |
}); | |
}); | |
} | |
function speechToText(filename, accessToken, callback) { | |
fs.readFile(filename, function(err, waveData) { | |
if(err) return callback(err); | |
request.post({ | |
url: 'https://speech.platform.bing.com/recognize/query', | |
qs: { | |
'scenarios': 'ulm', | |
'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5', // This magic value is required | |
'locale': 'en-US', | |
'device.os': 'wp7', | |
'version': '3.0', | |
'format': 'json', | |
'requestid': '1d4b6030-9099-11e0-91e4-0800200c9a66', // can be anything | |
'instanceid': '1d4b6030-9099-11e0-91e4-0800200c9a66' // can be anything | |
}, | |
body: waveData, | |
headers: { | |
'Authorization': 'Bearer ' + accessToken, | |
'Content-Type': 'audio/wav; samplerate=16000', | |
'Content-Length' : waveData.length | |
} | |
}, function(err, resp, body) { | |
if(err) return callback(err); | |
try { | |
callback(null, JSON.parse(body)); | |
} catch(e) { | |
callback(e); | |
} | |
}); | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var rec = require('node-record-lpcm16'); | |
var fs = require('fs'); | |
var file = fs.createWriteStream('./data/audio.wav', { encoding: 'binary' }); | |
rec.start({ | |
sampleRate: 44100, | |
verbose: true | |
}) | |
.pipe(file); | |
setTimeout(function () { | |
rec.stop(); | |
}, 10000); // stop after 10 seconds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "speech-to-text", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"config": "^1.19.0", | |
"node-record-lpcm16": "^0.1.4", | |
"request": "^2.67.0", | |
"util": "^0.10.3" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment