Last active
January 14, 2023 11:04
-
-
Save heaversm/447e0ec7900cb235ee7ede588818b6cb to your computer and use it in GitHub Desktop.
Twilio Call Recording Transcriptions With Google Web Speech API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//node vars | |
const express = require('express') | |
const twilio = require('twilio') | |
const request = require('request') | |
//twilio vars | |
const accountSid = '###' //your twilio account SID | |
const authToken = "###" //your twilio auth token | |
const client = require('twilio')(accountSid, authToken) | |
const baseURL = 'https://api.twilio.com/2010-04-01/Accounts/[YOUR_ACCOUNT_HERE]/Recordings/' | |
//cloud speech vars | |
const Speech = require('@google-cloud/speech') | |
const projectId = 'twilio-translated' | |
const speechClient = Speech({ | |
projectId: projectId | |
}) | |
const speech_options = { | |
encoding: 'LINEAR16', | |
sampleRateHertz: 8000, | |
languageCode: 'en-US', | |
verbose: true, | |
} | |
let app = express() | |
//recording the call | |
app.post('/record', (request, response) => { //you will have to go to your twilio console and tell it to use this route for incoming calls (e.g. https://yourserver.com/record) | |
// Use the Twilio Node.js SDK to build an XML response | |
let twiml = new twilio.TwimlResponse() | |
twiml.say('Hello. Please leave your message. Press any button to end recording.') //to speak a robot recorded voice message | |
// Use <Record> to record and transcribe the caller's message | |
twiml.record({ | |
transcribe: true, //I am leaving the transcription in here so I can compare twilio transcriptions to google ones. In your app, this can be omitted or set to false | |
maxLength: 30, //seconds to record | |
}) | |
// End the call with <Hangup> | |
twiml.hangup() | |
// Render the response as XML in reply to the webhook request | |
response.type('text/xml') | |
response.send(twiml.toString()) | |
}) | |
app.get('/list', (request, response) => { //I'm showing | |
let hasSentData = false //twilio calls and their transcriptions are accessed via two different API endpoints, therefore I am making sure to collect both, combine them, and then send them back to the front end user | |
const responseData = { | |
recordings: [], | |
transcriptions: [], | |
} | |
client.recordings.list(function (err, data) { | |
data.forEach(function (recording) { | |
const recordingObj = { | |
recordingSid: recording.sid, | |
} | |
responseData.recordings.push(recordingObj) | |
}) | |
if (responseData.transcriptions.length > 0 && !hasSentData) { //if we have transcriptions already, we can send the recordings along with the transcriptions | |
hasSentData = true | |
response.send(responseData) | |
} | |
}) | |
client.transcriptions.list(function (err, data) { | |
data.forEach(function (transcription) { | |
if (transcription.status != 'failed') { //only return recordings with an actual twilio transcription | |
const transcriptionObj = { | |
transcriptionText: transcription.transcriptionText, | |
recordingSid: transcription.recordingSid, | |
} | |
responseData.transcriptions.push(transcriptionObj) | |
} | |
}) | |
if (responseData.recordings.length > 0 && !hasSentData) { | |
hasSentData = true | |
response.send(responseData) | |
} | |
}) | |
}) | |
app.get('/transcribe', (req, res) => { | |
const speech_options = { | |
encoding: 'LINEAR16', | |
sampleRateHertz: 8000, | |
languageCode: 'en-US', | |
verbose: true, | |
} | |
const sid = req.query.sid | |
const directURL = baseURL + sid + '.wav' | |
const transcribeCallback = (err, transcript, apiResponse) => { | |
if (err) { | |
console.log(err) | |
} else { | |
const transcriptText = transcript.map((item) => { | |
return item.transcript | |
}).join(" ") | |
res.send(transcriptText) | |
} | |
} | |
speechClient.recognize(directURL, speech_options, transcribeCallback) | |
} | |
app.use(express.static('public_html')) //I am having express serve my static front end app, your application might be different | |
app.listen([YOUR_PORT], function () { //run the server and listen for all incoming requests | |
console.log('listening') | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
loadFiles = function () { | |
$.ajax({ | |
method: "GET", | |
url: "/list" | |
}).done(function (data) { | |
//associate calls with transcriptions (this should probably be done on the backend, really) | |
for (var i = 0; i < data.transcriptions.length; i++) { | |
var thisTranscription = data.transcriptions[i] | |
var thisRecording = data.recordings.filter(recording => recording.recordingSid === thisTranscription.recordingSid); | |
if (thisRecording.length) { | |
thisTranscription.recordingURL = thisRecording[0].recordingURL | |
} | |
} | |
playlist = data.transcriptions | |
len = playlist.length | |
loadAudio() | |
}) | |
} | |
//I am loading and transcribing the audio via google cloud on the fly from the front end. In reality, you'd be doing this on the backend and having your server store those transcriptions somewhere so they're not being transcribed every time the app is accessed by a front end user | |
loadAudio = function () { | |
const curSID = playlist[current].recordingSid | |
transcribeAudio(curSID) | |
const audioSource = '/recordings/' + playlist[current].recordingSid | |
$audio[0].addEventListener('canplaythrough', playAudio) | |
$audio[0].load() | |
} | |
transcribeAudio = function (audioSource, curSID) { | |
var params = { source: audioSource, sid: curSID } | |
$.get('/transcribe', params).done((data) => { | |
console.log(data) //log the google speech api transcription | |
}) | |
} | |
playAudio = function () { | |
if (playlist[current] && playlist[current].transcriptionText != null) { | |
console.log(playlist[current].transcriptionText) //log the twilio speech api transcription | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment