Skip to content

Instantly share code, notes, and snippets.

@roshanadh
Last active May 8, 2025 23:39
Show Gist options
  • Select an option

  • Save roshanadh/fae20eb8561e972a9373723d499600f0 to your computer and use it in GitHub Desktop.

Select an option

Save roshanadh/fae20eb8561e972a9373723d499600f0 to your computer and use it in GitHub Desktop.
Realtime API with TypeScript and Azure OpenAI
import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
import { AzureOpenAI } from "openai";
import Speaker from "speaker";
import "dotenv/config";
// configure the speaker instance -- this will pipe the audio to the speakers in realtime
const speaker = new Speaker({
channels: 1, // mono channel
bitDepth: 16, // 16-bit
sampleRate: 24000, // 24Khz
});
async function main(): Promise<void> {
// configure these in your .env file
const endpoint = process.env.AZURE_OPENAI_ENDPOINT;
const apiKey = process.env.AZURE_OPENAI_API_KEY;
const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME;
const apiVersion = process.env.OPENAI_API_VERSION;
// create the OpenAI client
const azureOpenAIClient = new AzureOpenAI({
apiKey: apiKey,
apiVersion: apiVersion,
deployment: deploymentName,
endpoint: endpoint,
});
const realtimeClient = await OpenAIRealtimeWebSocket.azure(azureOpenAIClient);
realtimeClient.socket.addEventListener("open", () => {
console.log("Connection opened!");
// once the connection opens, initiate the conversation with the model
realtimeClient.send({
type: "session.update",
session: {
modalities: ["text", "audio"],
model: "gpt-4o-mini-realtime-preview",
},
});
realtimeClient.send({
type: "conversation.item.create",
item: {
type: "message",
role: "user",
content: [{ type: "input_text", text: "Please assist the user with their query." }],
},
});
// prompt the model to respond to user query
realtimeClient.send({ type: "response.create" });
});
realtimeClient.on("error", (err) => {
throw err;
});
realtimeClient.on("session.created", (event) => {
console.log("session created!", event.session);
console.log();
});
realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
// whenever there's an incoming chunk of audio, pipe it to the speaker
realtimeClient.on("response.audio.delta", (event) => {
const buffer = Buffer.from(event.delta, "base64");
console.log(`Received ${buffer.length} bytes of audio data.`);
speaker.write(buffer);
});
realtimeClient.on("response.audio_transcript.delta", (event) => {
console.log(`Received text delta:${event.delta}.`);
});
realtimeClient.on("response.text.done", () => console.log());
// when the model is finished with the response, close the WebSocket connection with the model
realtimeClient.on("response.done", () => {
speaker.end(); // flushes remaining audio
realtimeClient.close();
});
realtimeClient.socket.addEventListener("close", (event) => console.log("\nConnection closed. Event = ", event));
}
main().catch((err) => {
console.error("The sample encountered an error:", err);
});
export { main };
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment