|
import Listr from "listr"; |
|
import { Porcupine, BuiltinKeyword } from "@picovoice/porcupine-node"; |
|
import { PvRecorder } from "@picovoice/pvrecorder-node"; |
|
import VAD from "node-vad"; |
|
import { Configuration, OpenAIApi } from "openai"; |
|
import wavConverter from "wav-converter"; |
|
import { Readable, pipeline } from "node:stream"; |
|
import { spawn } from "child_process"; |
|
import { promisify } from "util"; |
|
const pipelineAsync = promisify(pipeline); |
|
import * as dotenv from "dotenv"; |
|
|
|
dotenv.config(); |
|
const env = process.env; |
|
const openai = new OpenAIApi(new Configuration({ apiKey: env.OPENAI })); |
|
const porcupine = new Porcupine(env.PORCUPINE, [BuiltinKeyword.JARVIS], [0.95]); |
|
const vad = new VAD(VAD.Mode.NORMAL); |
|
const recorder = new PvRecorder(-1, porcupine.frameLength); |
|
const makeMessage = (role, content) => ({ role, content }); |
|
let history = [makeMessage("system", env.SYSTEM_MESSAGE)]; |
|
|
|
const subtasks = { |
|
recordingTask: { |
|
title: "Recording", |
|
task: async (ctx, task) => { |
|
let audioBuffer = Buffer.from([]); |
|
async function process() { |
|
const audioFrame = await recorder.read(); |
|
audioBuffer = Buffer.concat([audioBuffer, Buffer.from(audioFrame.buffer)]); |
|
while (audioBuffer.length < 50000 * 1.5) { |
|
await process(); |
|
} |
|
const res = await vad.processAudio(audioBuffer, porcupine.sampleRate); |
|
if (res === VAD.Event.SILENCE) ctx.audio = audioBuffer; |
|
else await process(); |
|
} |
|
await process(); |
|
recorder.stop(); |
|
task.title = `Captured ${audioBuffer.length} audio frames.`; |
|
}, |
|
}, |
|
transcriptionTask: { |
|
title: "Transcribing", |
|
task: async (ctx, task) => { |
|
const audioReadStream = Readable.from(wavConverter.encodeWav(ctx.audio)); |
|
audioReadStream.path = "conversation.wav"; |
|
const { |
|
data: { text }, |
|
} = await openai.createTranscription(audioReadStream, "whisper-1"); |
|
ctx.transcript = text; |
|
task.title = text; |
|
}, |
|
}, |
|
inferrenceTask: { |
|
title: "Inferring", |
|
task: async (ctx, task) => { |
|
const newHistory = [...history, makeMessage("user", ctx.transcript)]; |
|
const request = { messages: newHistory, model: env.MODEL }; |
|
const { data } = await openai.createChatCompletion(request); |
|
ctx.response = data.choices[0].message; |
|
history = [...newHistory, ctx.response]; |
|
task.title = ctx.response.content; |
|
}, |
|
}, |
|
speechTask: { |
|
title: "Speaking", |
|
task: async (ctx, task) => { |
|
await new Promise(async (resolve) => { |
|
const response = await fetch(env.VOICE_URI, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
"xi-api-key": env.VOICE_KEY, |
|
}, |
|
body: JSON.stringify({ text: ctx.response.content }), |
|
}); |
|
const mpg123 = spawn("mpg123", ["-"]); |
|
await pipelineAsync(response.body, mpg123.stdin); |
|
mpg123.on("close", (code) => { |
|
resolve(`mpg123 process exited with code ${code}`); |
|
}); |
|
}); |
|
task.title = "Finished speaking"; |
|
}, |
|
}, |
|
}; |
|
|
|
const wakeWordTask = { |
|
title: "Listening", |
|
task: async (_ctx, task) => { |
|
recorder.start(); |
|
while (porcupine.process(await recorder.read())) {} |
|
task.title = "'Jarvis' detected."; |
|
return new Listr(Object.values(subtasks)); |
|
}, |
|
}; |
|
|
|
while (true) { |
|
await new Listr([wakeWordTask], { collapse: false }).run(); |
|
} |