Skip to content

Instantly share code, notes, and snippets.

@he9lin
Created August 15, 2024 16:27
Show Gist options
  • Save he9lin/45257786e922b8208088f15133983956 to your computer and use it in GitHub Desktop.
Save he9lin/45257786e922b8208088f15133983956 to your computer and use it in GitHub Desktop.
require('dotenv').config();
require('colors');
const express = require('express');
const ExpressWs = require('express-ws');
const { GptService } = require('./services/gpt-service');
const { StreamService } = require('./services/stream-service');
const { TranscriptionService } = require('./services/transcription-service');
const { TextToSpeechService } = require('./services/tts-service');
const { recordingService } = require('./services/recording-service');
const VoiceResponse = require('twilio').twiml.VoiceResponse;
const app = express();
ExpressWs(app);
const PORT = process.env.PORT || 3000;
app.post('/incoming', (req, res) => {
try {
const response = new VoiceResponse();
const connect = response.connect();
connect.stream({ url: `wss://${process.env.SERVER}/connection` });
res.type('text/xml');
res.end(response.toString());
} catch (err) {
console.log(err);
}
});
app.ws('/connection', (ws) => {
try {
ws.on('error', console.error);
// Filled in from start message
let streamSid;
let callSid;
// Given phone number
// - Business name
// - prompt
// - greetings
// can hard code
// const gptService = new GptService(name, prompt, greetings);
const transcriptionService = new TranscriptionService();
const gptService = new GptService();
const ttsService = new TextToSpeechService({});
const streamService = new StreamService(ws);
let marks = [];
let interactionCount = 0;
// Incoming from MediaStream
ws.on('message', function message(data) {
const msg = JSON.parse(data);
if (msg.event === 'start') {
streamSid = msg.start.streamSid;
callSid = msg.start.callSid;
// log msg
streamService.setStreamSid(streamSid); // Twilio knows who sent back the audio
gptService.setCallSid(callSid); // Knows where to transfer the call
// TODO:
// Given phone number, update more GPT context
// - Business name
// - prompt
// - greetings
// can hard code
// Set RECORDING_ENABLED='true' in .env to record calls
recordingService(ttsService, callSid).then(() => {
console.log(`Twilio -> Starting Media Stream for ${streamSid}`.underline.red);
ttsService.generate({partialResponseIndex: null, partialResponse: 'Hello! I understand you\'re looking for a pair of AirPods, is that correct?'}, 0);
});
} else if (msg.event === 'media') {
transcriptionService.send(msg.media.payload);
} else if (msg.event === 'mark') {
const label = msg.mark.name;
console.log(`Twilio -> Audio completed mark (${msg.sequenceNumber}): ${label}`.red);
marks = marks.filter(m => m !== msg.mark.name);
} else if (msg.event === 'stop') {
console.log(`Twilio -> Media stream ${streamSid} ended.`.underline.red);
}
});
transcriptionService.on('utterance', async (text) => {
// This is a bit of a hack to filter out empty utterances
if(marks.length > 0 && text?.length > 5) {
console.log('Twilio -> Interruption, Clearing stream'.red);
ws.send(
JSON.stringify({
streamSid,
event: 'clear',
})
);
}
});
transcriptionService.on('transcription', async (text) => {
if (!text) { return; }
console.log(`Interaction ${interactionCount} – STT -> GPT: ${text}`.yellow);
gptService.completion(text, interactionCount);
interactionCount += 1;
});
gptService.on('gptreply', async (gptReply, icount) => {
console.log(`Interaction ${icount}: GPT -> TTS: ${gptReply.partialResponse}`.green );
ttsService.generate(gptReply, icount);
});
ttsService.on('speech', (responseIndex, audio, label, icount) => {
console.log(`Interaction ${icount}: TTS -> TWILIO: ${label}`.blue);
streamService.buffer(responseIndex, audio);
});
streamService.on('audiosent', (markLabel) => {
marks.push(markLabel);
});
} catch (err) {
console.log(err);
}
});
app.listen(PORT);
console.log(`Server running on port ${PORT}`);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment