Skip to content

Instantly share code, notes, and snippets.

@heaversm
Last active January 14, 2023 11:04
Show Gist options
  • Save heaversm/447e0ec7900cb235ee7ede588818b6cb to your computer and use it in GitHub Desktop.
Save heaversm/447e0ec7900cb235ee7ede588818b6cb to your computer and use it in GitHub Desktop.
Twilio Call Recording Transcriptions With Google Web Speech API
//node vars
const express = require('express')
const twilio = require('twilio')
const request = require('request')
//twilio vars
const accountSid = '###' //your twilio account SID
const authToken = "###" //your twilio auth token
const client = require('twilio')(accountSid, authToken)
const baseURL = 'https://api.twilio.com/2010-04-01/Accounts/[YOUR_ACCOUNT_HERE]/Recordings/'
//cloud speech vars
const Speech = require('@google-cloud/speech')
const projectId = 'twilio-translated'
const speechClient = Speech({
projectId: projectId
})
const speech_options = {
encoding: 'LINEAR16',
sampleRateHertz: 8000,
languageCode: 'en-US',
verbose: true,
}
let app = express()
//recording the call
app.post('/record', (request, response) => { //you will have to go to your twilio console and tell it to use this route for incoming calls (e.g. https://yourserver.com/record)
// Use the Twilio Node.js SDK to build an XML response
let twiml = new twilio.TwimlResponse()
twiml.say('Hello. Please leave your message. Press any button to end recording.') //to speak a robot recorded voice message
// Use <Record> to record and transcribe the caller's message
twiml.record({
transcribe: true, //I am leaving the transcription in here so I can compare twilio transcriptions to google ones. In your app, this can be omitted or set to false
maxLength: 30, //seconds to record
})
// End the call with <Hangup>
twiml.hangup()
// Render the response as XML in reply to the webhook request
response.type('text/xml')
response.send(twiml.toString())
})
app.get('/list', (request, response) => { //I'm showing
let hasSentData = false //twilio calls and their transcriptions are accessed via two different API endpoints, therefore I am making sure to collect both, combine them, and then send them back to the front end user
const responseData = {
recordings: [],
transcriptions: [],
}
client.recordings.list(function (err, data) {
data.forEach(function (recording) {
const recordingObj = {
recordingSid: recording.sid,
}
responseData.recordings.push(recordingObj)
})
if (responseData.transcriptions.length > 0 && !hasSentData) { //if we have transcriptions already, we can send the recordings along with the transcriptions
hasSentData = true
response.send(responseData)
}
})
client.transcriptions.list(function (err, data) {
data.forEach(function (transcription) {
if (transcription.status != 'failed') { //only return recordings with an actual twilio transcription
const transcriptionObj = {
transcriptionText: transcription.transcriptionText,
recordingSid: transcription.recordingSid,
}
responseData.transcriptions.push(transcriptionObj)
}
})
if (responseData.recordings.length > 0 && !hasSentData) {
hasSentData = true
response.send(responseData)
}
})
})
app.get('/transcribe', (req, res) => {
const speech_options = {
encoding: 'LINEAR16',
sampleRateHertz: 8000,
languageCode: 'en-US',
verbose: true,
}
const sid = req.query.sid
const directURL = baseURL + sid + '.wav'
const transcribeCallback = (err, transcript, apiResponse) => {
if (err) {
console.log(err)
} else {
const transcriptText = transcript.map((item) => {
return item.transcript
}).join(" ")
res.send(transcriptText)
}
}
speechClient.recognize(directURL, speech_options, transcribeCallback)
}
app.use(express.static('public_html')) //I am having express serve my static front end app, your application might be different
app.listen([YOUR_PORT], function () { //run the server and listen for all incoming requests
console.log('listening')
})
loadFiles = function () {
$.ajax({
method: "GET",
url: "/list"
}).done(function (data) {
//associate calls with transcriptions (this should probably be done on the backend, really)
for (var i = 0; i < data.transcriptions.length; i++) {
var thisTranscription = data.transcriptions[i]
var thisRecording = data.recordings.filter(recording => recording.recordingSid === thisTranscription.recordingSid);
if (thisRecording.length) {
thisTranscription.recordingURL = thisRecording[0].recordingURL
}
}
playlist = data.transcriptions
len = playlist.length
loadAudio()
})
}
//I am loading and transcribing the audio via google cloud on the fly from the front end. In reality, you'd be doing this on the backend and having your server store those transcriptions somewhere so they're not being transcribed every time the app is accessed by a front end user
loadAudio = function () {
const curSID = playlist[current].recordingSid
transcribeAudio(curSID)
const audioSource = '/recordings/' + playlist[current].recordingSid
$audio[0].addEventListener('canplaythrough', playAudio)
$audio[0].load()
}
transcribeAudio = function (audioSource, curSID) {
var params = { source: audioSource, sid: curSID }
$.get('/transcribe', params).done((data) => {
console.log(data) //log the google speech api transcription
})
}
playAudio = function () {
if (playlist[current] && playlist[current].transcriptionText != null) {
console.log(playlist[current].transcriptionText) //log the twilio speech api transcription
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment