Created
August 15, 2024 16:27
-
-
Save he9lin/45257786e922b8208088f15133983956 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require('dotenv').config(); | |
require('colors'); | |
const express = require('express'); | |
const ExpressWs = require('express-ws'); | |
const { GptService } = require('./services/gpt-service'); | |
const { StreamService } = require('./services/stream-service'); | |
const { TranscriptionService } = require('./services/transcription-service'); | |
const { TextToSpeechService } = require('./services/tts-service'); | |
const { recordingService } = require('./services/recording-service'); | |
const VoiceResponse = require('twilio').twiml.VoiceResponse; | |
const app = express(); | |
ExpressWs(app); | |
const PORT = process.env.PORT || 3000; | |
app.post('/incoming', (req, res) => { | |
try { | |
const response = new VoiceResponse(); | |
const connect = response.connect(); | |
connect.stream({ url: `wss://${process.env.SERVER}/connection` }); | |
res.type('text/xml'); | |
res.end(response.toString()); | |
} catch (err) { | |
console.log(err); | |
} | |
}); | |
app.ws('/connection', (ws) => { | |
try { | |
ws.on('error', console.error); | |
// Filled in from start message | |
let streamSid; | |
let callSid; | |
// Given phone number | |
// - Business name | |
// - prompt | |
// - greetings | |
// can hard code | |
// const gptService = new GptService(name, prompt, greetings); | |
const transcriptionService = new TranscriptionService(); | |
const gptService = new GptService(); | |
const ttsService = new TextToSpeechService({}); | |
const streamService = new StreamService(ws); | |
let marks = []; | |
let interactionCount = 0; | |
// Incoming from MediaStream | |
ws.on('message', function message(data) { | |
const msg = JSON.parse(data); | |
if (msg.event === 'start') { | |
streamSid = msg.start.streamSid; | |
callSid = msg.start.callSid; | |
// log msg | |
streamService.setStreamSid(streamSid); // Twilio knows who sent back the audio | |
gptService.setCallSid(callSid); // Knows where to transfer the call | |
// TODO: | |
// Given phone number, update more GPT context | |
// - Business name | |
// - prompt | |
// - greetings | |
// can hard code | |
// Set RECORDING_ENABLED='true' in .env to record calls | |
recordingService(ttsService, callSid).then(() => { | |
console.log(`Twilio -> Starting Media Stream for ${streamSid}`.underline.red); | |
ttsService.generate({partialResponseIndex: null, partialResponse: 'Hello! I understand you\'re looking for a pair of AirPods, is that correct?'}, 0); | |
}); | |
} else if (msg.event === 'media') { | |
transcriptionService.send(msg.media.payload); | |
} else if (msg.event === 'mark') { | |
const label = msg.mark.name; | |
console.log(`Twilio -> Audio completed mark (${msg.sequenceNumber}): ${label}`.red); | |
marks = marks.filter(m => m !== msg.mark.name); | |
} else if (msg.event === 'stop') { | |
console.log(`Twilio -> Media stream ${streamSid} ended.`.underline.red); | |
} | |
}); | |
transcriptionService.on('utterance', async (text) => { | |
// This is a bit of a hack to filter out empty utterances | |
if(marks.length > 0 && text?.length > 5) { | |
console.log('Twilio -> Interruption, Clearing stream'.red); | |
ws.send( | |
JSON.stringify({ | |
streamSid, | |
event: 'clear', | |
}) | |
); | |
} | |
}); | |
transcriptionService.on('transcription', async (text) => { | |
if (!text) { return; } | |
console.log(`Interaction ${interactionCount} – STT -> GPT: ${text}`.yellow); | |
gptService.completion(text, interactionCount); | |
interactionCount += 1; | |
}); | |
gptService.on('gptreply', async (gptReply, icount) => { | |
console.log(`Interaction ${icount}: GPT -> TTS: ${gptReply.partialResponse}`.green ); | |
ttsService.generate(gptReply, icount); | |
}); | |
ttsService.on('speech', (responseIndex, audio, label, icount) => { | |
console.log(`Interaction ${icount}: TTS -> TWILIO: ${label}`.blue); | |
streamService.buffer(responseIndex, audio); | |
}); | |
streamService.on('audiosent', (markLabel) => { | |
marks.push(markLabel); | |
}); | |
} catch (err) { | |
console.log(err); | |
} | |
}); | |
app.listen(PORT); | |
console.log(`Server running on port ${PORT}`); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment