roshanadh · May 8, 2025 23:39
diff --git a/index.ts b/index.ts
 import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
 import { AzureOpenAI } from "openai";
 import Speaker from "speaker";
 import "dotenv/config";

 // configure the speaker instance -- this will pipe the audio to the speakers in realtime
 const speaker = new Speaker({
  channels: 1, // mono channel
  bitDepth: 16, // 16-bit
  sampleRate: 24000, // 24Khz
 });

 async function main(): Promise<void> {

  // configure these in your .env file
  const endpoint = process.env.AZURE_OPENAI_ENDPOINT;
  const apiKey = process.env.AZURE_OPENAI_API_KEY;
  const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME;
  const apiVersion = process.env.OPENAI_API_VERSION;

  // create the OpenAI client
  const azureOpenAIClient = new AzureOpenAI({
    apiKey: apiKey,
    apiVersion: apiVersion,
    deployment: deploymentName,
    endpoint: endpoint,
  });

  const realtimeClient = await OpenAIRealtimeWebSocket.azure(azureOpenAIClient);

  realtimeClient.socket.addEventListener("open", () => {
    console.log("Connection opened!");

    // once the connection opens, initiate the conversation with the model
    realtimeClient.send({
      type: "session.update",
      session: {
        modalities: ["text", "audio"],
        model: "gpt-4o-mini-realtime-preview",
      },
    });

    realtimeClient.send({
      type: "conversation.item.create",
      item: {
        type: "message",
        role: "user",
        content: [{ type: "input_text", text: "Please assist the user with their query." }],
      },
    });

    // prompt the model to respond to user query
    realtimeClient.send({ type: "response.create" });
  });

  realtimeClient.on("error", (err) => {
    throw err;
  });

  realtimeClient.on("session.created", (event) => {
    console.log("session created!", event.session);
    console.log();
  });

  realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));

  // whenever there's an incoming chunk of audio, pipe it to the speaker
  realtimeClient.on("response.audio.delta", (event) => {
    const buffer = Buffer.from(event.delta, "base64");
    console.log(`Received ${buffer.length} bytes of audio data.`);

    speaker.write(buffer);
  });

  realtimeClient.on("response.audio_transcript.delta", (event) => {
    console.log(`Received text delta:${event.delta}.`);
  });

  realtimeClient.on("response.text.done", () => console.log());

  // when the model is finished with the response, close the WebSocket connection with the model
  realtimeClient.on("response.done", () => {
    speaker.end(); // flushes remaining audio
    realtimeClient.close();
  });

  realtimeClient.socket.addEventListener("close", (event) => console.log("\nConnection closed. Event = ", event));

 }

 main().catch((err) => {
  console.error("The sample encountered an error:", err);
 });

 export { main };
	import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
	import { AzureOpenAI } from "openai";
	import Speaker from "speaker";
	import "dotenv/config";

	// configure the speaker instance -- this will pipe the audio to the speakers in realtime
	const speaker = new Speaker({
	channels: 1, // mono channel
	bitDepth: 16, // 16-bit
	sampleRate: 24000, // 24Khz
	});

	async function main(): Promise<void> {

	// configure these in your .env file
	const endpoint = process.env.AZURE_OPENAI_ENDPOINT;
	const apiKey = process.env.AZURE_OPENAI_API_KEY;
	const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME;
	const apiVersion = process.env.OPENAI_API_VERSION;

	// create the OpenAI client
	const azureOpenAIClient = new AzureOpenAI({
	apiKey: apiKey,
	apiVersion: apiVersion,
	deployment: deploymentName,
	endpoint: endpoint,
	});

	const realtimeClient = await OpenAIRealtimeWebSocket.azure(azureOpenAIClient);

	realtimeClient.socket.addEventListener("open", () => {
	console.log("Connection opened!");

	// once the connection opens, initiate the conversation with the model
	realtimeClient.send({
	type: "session.update",
	session: {
	modalities: ["text", "audio"],
	model: "gpt-4o-mini-realtime-preview",
	},
	});

	realtimeClient.send({
	type: "conversation.item.create",
	item: {
	type: "message",
	role: "user",
	content: [{ type: "input_text", text: "Please assist the user with their query." }],
	},
	});

	// prompt the model to respond to user query
	realtimeClient.send({ type: "response.create" });
	});

	realtimeClient.on("error", (err) => {
	throw err;
	});

	realtimeClient.on("session.created", (event) => {
	console.log("session created!", event.session);
	console.log();
	});

	realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));

	// whenever there's an incoming chunk of audio, pipe it to the speaker
	realtimeClient.on("response.audio.delta", (event) => {
	const buffer = Buffer.from(event.delta, "base64");
	console.log(`Received ${buffer.length} bytes of audio data.`);

	speaker.write(buffer);
	});

	realtimeClient.on("response.audio_transcript.delta", (event) => {
	console.log(`Received text delta:${event.delta}.`);
	});

	realtimeClient.on("response.text.done", () => console.log());

	// when the model is finished with the response, close the WebSocket connection with the model
	realtimeClient.on("response.done", () => {
	speaker.end(); // flushes remaining audio
	realtimeClient.close();
	});

	realtimeClient.socket.addEventListener("close", (event) => console.log("\nConnection closed. Event = ", event));

	}

	main().catch((err) => {
	console.error("The sample encountered an error:", err);
	});

	export { main };
No results found