ericlewis · October 25, 2023 18:25
diff --git a/.directions.md b/.directions.md
diff --git a/.env b/.env
 OPENAI="YOUR_OPENAI_KEY_HERE"
 PORCUPINE="YOUR_PICOVOICE_KEY_HERE"
 VOICE_KEY="YOUR_ELEVENLABS_KEY_HERE"

 VOICE_URI="https://api.elevenlabs.io/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL/stream"
 SYSTEM_MESSAGE="You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible."
 MODEL="gpt-3.5-turbo"
diff --git a/index.js b/index.js
 import Listr from "listr";
 import { Porcupine, BuiltinKeyword } from "@picovoice/porcupine-node";
 import { PvRecorder } from "@picovoice/pvrecorder-node";
 import VAD from "node-vad";
 import { Configuration, OpenAIApi } from "openai";
 import wavConverter from "wav-converter";
 import { Readable, pipeline } from "node:stream";
 import { spawn } from "child_process";
 import { promisify } from "util";
 const pipelineAsync = promisify(pipeline);
 import * as dotenv from "dotenv";

 dotenv.config();
 const env = process.env;
 const openai = new OpenAIApi(new Configuration({ apiKey: env.OPENAI }));
 const porcupine = new Porcupine(env.PORCUPINE, [BuiltinKeyword.JARVIS], [0.95]);
 const vad = new VAD(VAD.Mode.NORMAL);
 const recorder = new PvRecorder(-1, porcupine.frameLength);
 const makeMessage = (role, content) => ({ role, content });
 let history = [makeMessage("system", env.SYSTEM_MESSAGE)];

 const subtasks = {
  recordingTask: {
 	title: "Recording",
 	task: async (ctx, task) => {
 	  let audioBuffer = Buffer.from([]);
 	  async function process() {
 		const audioFrame = await recorder.read();
 		audioBuffer = Buffer.concat([audioBuffer, Buffer.from(audioFrame.buffer)]);
 		while (audioBuffer.length < 50000 * 1.5) {
 		  await process();
 		}
 		const res = await vad.processAudio(audioBuffer, porcupine.sampleRate);
 		if (res === VAD.Event.SILENCE) ctx.audio = audioBuffer;
 		else await process();
 	  }
 	  await process();
 	  recorder.stop();
 	  task.title = `Captured ${audioBuffer.length} audio frames.`;
 	},
  },
  transcriptionTask: {
 	title: "Transcribing",
 	task: async (ctx, task) => {
 	  const audioReadStream = Readable.from(wavConverter.encodeWav(ctx.audio));
 	  audioReadStream.path = "conversation.wav";
 	  const {
 		data: { text },
 	  } = await openai.createTranscription(audioReadStream, "whisper-1");
 	  ctx.transcript = text;
 	  task.title = text;
 	},
  },
  inferrenceTask: {
 	title: "Inferring",
 	task: async (ctx, task) => {
 	  const newHistory = [...history, makeMessage("user", ctx.transcript)];
 	  const request = { messages: newHistory, model: env.MODEL };
 	  const { data } = await openai.createChatCompletion(request);
 	  ctx.response = data.choices[0].message;
 	  history = [...newHistory, ctx.response];
 	  task.title = ctx.response.content;
 	},
  },
  speechTask: {
 	title: "Speaking",
 	task: async (ctx, task) => {
 	  await new Promise(async (resolve) => {
 		const response = await fetch(env.VOICE_URI, {
 		  method: "POST",
 		  headers: {
 		    "Content-Type": "application/json",
 			"xi-api-key": env.VOICE_KEY,
 		  },
 		  body: JSON.stringify({ text: ctx.response.content }),
 		});
 		const mpg123 = spawn("mpg123", ["-"]);
 		await pipelineAsync(response.body, mpg123.stdin);
 		mpg123.on("close", (code) => {
 		  resolve(`mpg123 process exited with code ${code}`);
 		});
 	  });
 	  task.title = "Finished speaking";
 	},
  },
 };

 const wakeWordTask = {
  title: "Listening",
  task: async (_ctx, task) => {
 	recorder.start();
 	while (porcupine.process(await recorder.read())) {}
 	task.title = "'Jarvis' detected.";
 	return new Listr(Object.values(subtasks));
  },
 };

 while (true) {
  await new Listr([wakeWordTask], { collapse: false }).run();
 }
diff --git a/package.json b/package.json
 {
  "name": "assistant",
  "version": "1.0.0",
  "description": "a conversational chatbot",
  "main": "index.js",
  "type": "module",
  "scripts": {
    "start": "node index.js"
  },
  "author": "Eric Lewis",
  "license": "MIT",
  "dependencies": {
    "@picovoice/porcupine-node": "^2.1.7",
    "@picovoice/pvrecorder-node": "^1.1.2",
    "dotenv": "^16.0.3",
    "listr": "^0.14.3",
    "node-vad": "^1.1.4",
    "openai": "^3.2.1",
    "wav-converter": "^1.0.0"
  }
 }
	OPENAI="YOUR_OPENAI_KEY_HERE"
	PORCUPINE="YOUR_PICOVOICE_KEY_HERE"
	VOICE_KEY="YOUR_ELEVENLABS_KEY_HERE"

	VOICE_URI="https://api.elevenlabs.io/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL/stream"
	SYSTEM_MESSAGE="You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible."
	MODEL="gpt-3.5-turbo"
	import Listr from "listr";
	import { Porcupine, BuiltinKeyword } from "@picovoice/porcupine-node";
	import { PvRecorder } from "@picovoice/pvrecorder-node";
	import VAD from "node-vad";
	import { Configuration, OpenAIApi } from "openai";
	import wavConverter from "wav-converter";
	import { Readable, pipeline } from "node:stream";
	import { spawn } from "child_process";
	import { promisify } from "util";
	const pipelineAsync = promisify(pipeline);
	import * as dotenv from "dotenv";

	dotenv.config();
	const env = process.env;
	const openai = new OpenAIApi(new Configuration({ apiKey: env.OPENAI }));
	const porcupine = new Porcupine(env.PORCUPINE, [BuiltinKeyword.JARVIS], [0.95]);
	const vad = new VAD(VAD.Mode.NORMAL);
	const recorder = new PvRecorder(-1, porcupine.frameLength);
	const makeMessage = (role, content) => ({ role, content });
	let history = [makeMessage("system", env.SYSTEM_MESSAGE)];

	const subtasks = {
	recordingTask: {
	title: "Recording",
	task: async (ctx, task) => {
	let audioBuffer = Buffer.from([]);
	async function process() {
	const audioFrame = await recorder.read();
	audioBuffer = Buffer.concat([audioBuffer, Buffer.from(audioFrame.buffer)]);
	while (audioBuffer.length < 50000 * 1.5) {
	await process();
	}
	const res = await vad.processAudio(audioBuffer, porcupine.sampleRate);
	if (res === VAD.Event.SILENCE) ctx.audio = audioBuffer;
	else await process();
	}
	await process();
	recorder.stop();
	task.title = `Captured ${audioBuffer.length} audio frames.`;
	},
	},
	transcriptionTask: {
	title: "Transcribing",
	task: async (ctx, task) => {
	const audioReadStream = Readable.from(wavConverter.encodeWav(ctx.audio));
	audioReadStream.path = "conversation.wav";
	const {
	data: { text },
	} = await openai.createTranscription(audioReadStream, "whisper-1");
	ctx.transcript = text;
	task.title = text;
	},
	},
	inferrenceTask: {
	title: "Inferring",
	task: async (ctx, task) => {
	const newHistory = [...history, makeMessage("user", ctx.transcript)];
	const request = { messages: newHistory, model: env.MODEL };
	const { data } = await openai.createChatCompletion(request);
	ctx.response = data.choices[0].message;
	history = [...newHistory, ctx.response];
	task.title = ctx.response.content;
	},
	},
	speechTask: {
	title: "Speaking",
	task: async (ctx, task) => {
	await new Promise(async (resolve) => {
	const response = await fetch(env.VOICE_URI, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	"xi-api-key": env.VOICE_KEY,
	},
	body: JSON.stringify({ text: ctx.response.content }),
	});
	const mpg123 = spawn("mpg123", ["-"]);
	await pipelineAsync(response.body, mpg123.stdin);
	mpg123.on("close", (code) => {
	resolve(`mpg123 process exited with code ${code}`);
	});
	});
	task.title = "Finished speaking";
	},
	},
	};

	const wakeWordTask = {
	title: "Listening",
	task: async (_ctx, task) => {
	recorder.start();
	while (porcupine.process(await recorder.read())) {}
	task.title = "'Jarvis' detected.";
	return new Listr(Object.values(subtasks));
	},
	};

	while (true) {
	await new Listr([wakeWordTask], { collapse: false }).run();
	}
	{
	"name": "assistant",
	"version": "1.0.0",
	"description": "a conversational chatbot",
	"main": "index.js",
	"type": "module",
	"scripts": {
	"start": "node index.js"
	},
	"author": "Eric Lewis",
	"license": "MIT",
	"dependencies": {
	"@picovoice/porcupine-node": "^2.1.7",
	"@picovoice/pvrecorder-node": "^1.1.2",
	"dotenv": "^16.0.3",
	"listr": "^0.14.3",
	"node-vad": "^1.1.4",
	"openai": "^3.2.1",
	"wav-converter": "^1.0.0"
	}
	}