Last active
May 8, 2025 23:39
-
-
Save roshanadh/fae20eb8561e972a9373723d499600f0 to your computer and use it in GitHub Desktop.
Realtime API with TypeScript and Azure OpenAI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket'; | |
| import { AzureOpenAI } from "openai"; | |
| import Speaker from "speaker"; | |
| import "dotenv/config"; | |
| // configure the speaker instance -- this will pipe the audio to the speakers in realtime | |
| const speaker = new Speaker({ | |
| channels: 1, // mono channel | |
| bitDepth: 16, // 16-bit | |
| sampleRate: 24000, // 24Khz | |
| }); | |
| async function main(): Promise<void> { | |
| // configure these in your .env file | |
| const endpoint = process.env.AZURE_OPENAI_ENDPOINT; | |
| const apiKey = process.env.AZURE_OPENAI_API_KEY; | |
| const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; | |
| const apiVersion = process.env.OPENAI_API_VERSION; | |
| // create the OpenAI client | |
| const azureOpenAIClient = new AzureOpenAI({ | |
| apiKey: apiKey, | |
| apiVersion: apiVersion, | |
| deployment: deploymentName, | |
| endpoint: endpoint, | |
| }); | |
| const realtimeClient = await OpenAIRealtimeWebSocket.azure(azureOpenAIClient); | |
| realtimeClient.socket.addEventListener("open", () => { | |
| console.log("Connection opened!"); | |
| // once the connection opens, initiate the conversation with the model | |
| realtimeClient.send({ | |
| type: "session.update", | |
| session: { | |
| modalities: ["text", "audio"], | |
| model: "gpt-4o-mini-realtime-preview", | |
| }, | |
| }); | |
| realtimeClient.send({ | |
| type: "conversation.item.create", | |
| item: { | |
| type: "message", | |
| role: "user", | |
| content: [{ type: "input_text", text: "Please assist the user with their query." }], | |
| }, | |
| }); | |
| // prompt the model to respond to user query | |
| realtimeClient.send({ type: "response.create" }); | |
| }); | |
| realtimeClient.on("error", (err) => { | |
| throw err; | |
| }); | |
| realtimeClient.on("session.created", (event) => { | |
| console.log("session created!", event.session); | |
| console.log(); | |
| }); | |
| realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta)); | |
| // whenever there's an incoming chunk of audio, pipe it to the speaker | |
| realtimeClient.on("response.audio.delta", (event) => { | |
| const buffer = Buffer.from(event.delta, "base64"); | |
| console.log(`Received ${buffer.length} bytes of audio data.`); | |
| speaker.write(buffer); | |
| }); | |
| realtimeClient.on("response.audio_transcript.delta", (event) => { | |
| console.log(`Received text delta:${event.delta}.`); | |
| }); | |
| realtimeClient.on("response.text.done", () => console.log()); | |
| // when the model is finished with the response, close the WebSocket connection with the model | |
| realtimeClient.on("response.done", () => { | |
| speaker.end(); // flushes remaining audio | |
| realtimeClient.close(); | |
| }); | |
| realtimeClient.socket.addEventListener("close", (event) => console.log("\nConnection closed. Event = ", event)); | |
| } | |
| main().catch((err) => { | |
| console.error("The sample encountered an error:", err); | |
| }); | |
| export { main }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment