Skip to content

Instantly share code, notes, and snippets.

@schappim
Created December 18, 2024 02:31
Show Gist options
  • Save schappim/81b7aecd5addd5e632550ce924aa474f to your computer and use it in GitHub Desktop.
Save schappim/81b7aecd5addd5e632550ce924aa474f to your computer and use it in GitHub Desktop.
OpenAI’s Real-time API (with WebRTC) using StimulusJS and Rails
<%# A partial that can be placed anywhere to utilise the Real-time API (with WebRTC) %>
<div data-controller="webrtc" class="bg-gray-600 rounded-full text-white mb-4 ">
<audio data-webrtc-target="audio"></audio>
<button data-webrtc-target="toggle" data-action="click->webrtc#toggleAudio" class="relative flex items-center justify-center w-full font-medium min-w-[150px] min-h-[40px]">
<div class="w-full text-center" data-button-text>Enable Audio Chat</div>
<div data-webrtc-target="spinner" class="hidden absolute">
<svg class="animate-spin h-5 w-5" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
</div>
</button>
</div>
# app/controllers/openai_sessions_controller.rb
class OpenaiSessionsController < ApplicationController
def create
response = HTTParty.post(
"https://api.openai.com/v1/realtime/sessions",
headers: {
"Authorization" => "Bearer #{ENV["OPENAI_API_KEY"]}",
"Content-Type" => "application/json"
},
body: {
# model: "gpt-4o-realtime-preview-2024-12-17",
model: "gpt-4o-mini-realtime-preview-2024-12-17",
instructions: "You are STEM teacher who is teaching about Pi-hole you only answer questions about Pi-hole on Raspberry Pi and those directly related to getting it running on Raspberry Pi.
You keep the answers super-concise and to the point. You don't need to explain things in detail, just give the answer. Politely reject non-pi-hole on Raspberry Pi questions.",
voice: "ash"
}.to_json
)
render json: JSON.parse(response.body)
end
end
// app/javascript/controllers/webrtc_controller.js
import { Controller } from "@hotwired/stimulus";
// Create a global event emitter for the WebRTC connection
window.WebRTCEvents = {
dataChannel: null,
isConnected: false,
sendMessage: (message) => {
if (WebRTCEvents.dataChannel && WebRTCEvents.isConnected) {
WebRTCEvents.dataChannel.send(JSON.stringify(message));
return true;
}
console.warn("WebRTC data channel not connected");
return false;
},
};
export default class extends Controller {
static targets = ["audio", "toggle", "spinner"];
static classes = ["loading"];
isActive = false;
isLoading = false;
ephemeralKey = null;
static STORAGE_KEY = "openai_ephemeral_key";
connect() {
this.updateButtonText();
// this.ephemeralKey = localStorage.getItem(this.constructor.STORAGE_KEY);
this.ephemeralKey = null;
}
async getEphemeralKey() {
if (this.ephemeralKey) {
return this.ephemeralKey;
}
// const storedKey = localStorage.getItem(this.constructor.STORAGE_KEY);
const storedKey = null;
if (storedKey) {
this.ephemeralKey = storedKey;
return storedKey;
}
const tokenResponse = await fetch("/openai-sessions");
const data = await tokenResponse.json();
this.ephemeralKey = data.client_secret.value;
localStorage.setItem(this.constructor.STORAGE_KEY, this.ephemeralKey);
return this.ephemeralKey;
}
async initializeWebRTC() {
try {
this.setLoadingState(true);
const EPHEMERAL_KEY = await this.getEphemeralKey();
this.pc = new RTCPeerConnection();
this.audioTarget.autoplay = true;
this.audioTarget.controls = false;
this.pc.ontrack = (e) => (this.audioTarget.srcObject = e.streams[0]);
const ms = await navigator.mediaDevices.getUserMedia({ audio: true });
this.mediaStream = ms;
this.localAudioTrack = ms.getAudioTracks()[0];
this.pc.addTrack(this.localAudioTrack, ms);
// Create and set up the data channel
this.dataChannel = this.pc.createDataChannel("oai-events");
// Set up data channel event handlers
this.dataChannel.onopen = () => {
console.log("Data channel opened");
window.WebRTCEvents.isConnected = true;
window.WebRTCEvents.dataChannel = this.dataChannel;
};
this.dataChannel.onclose = () => {
console.log("Data channel closed");
window.WebRTCEvents.isConnected = false;
window.WebRTCEvents.dataChannel = null;
};
this.dataChannel.onmessage = (e) => {
try {
const realtimeEvent = JSON.parse(e.data);
console.log("Received message:", realtimeEvent);
// Dispatch a custom event that others can listen to
window.dispatchEvent(
new CustomEvent("webrtc-message", {
detail: realtimeEvent,
}),
);
} catch (error) {
console.error("Error parsing WebRTC message:", error);
}
};
const offer = await this.pc.createOffer();
await this.pc.setLocalDescription(offer);
const baseUrl = "https://api.openai.com/v1/realtime";
const model = "gpt-4o-realtime-preview-2024-12-17";
const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
method: "POST",
body: offer.sdp,
headers: {
Authorization: `Bearer ${EPHEMERAL_KEY}`,
"Content-Type": "application/sdp",
},
});
const answer = {
type: "answer",
sdp: await sdpResponse.text(),
};
await this.pc.setRemoteDescription(answer);
this.isActive = true;
} catch (error) {
console.error("Failed to initialize WebRTC:", error);
this.cleanup();
if (error.message?.includes("401") || error.message?.includes("403")) {
this.ephemeralKey = null;
localStorage.removeItem(this.constructor.STORAGE_KEY);
}
} finally {
this.setLoadingState(false);
}
}
setLoadingState(loading) {
this.isLoading = loading;
this.toggleTarget.disabled = loading;
if (loading) {
this.toggleTarget
.querySelector("[data-button-text]")
.classList.add("hidden");
this.spinnerTarget.classList.remove("hidden");
} else {
this.toggleTarget
.querySelector("[data-button-text]")
.classList.remove("hidden");
this.spinnerTarget.classList.add("hidden");
this.updateButtonText();
}
}
updateButtonText() {
const buttonText = this.toggleTarget.querySelector("[data-button-text]");
buttonText.textContent = this.isActive
? "Disable Audio Chat"
: "Enable Audio Chat";
}
cleanup() {
if (this.pc) {
if (this.dataChannel) {
this.dataChannel.close();
}
this.pc.close();
this.pc = null;
}
if (this.mediaStream) {
this.mediaStream.getTracks().forEach((track) => {
track.stop();
});
this.mediaStream = null;
}
if (this.audioTarget) {
this.audioTarget.srcObject = null;
this.audioTarget.pause();
}
this.localAudioTrack = null;
this.isActive = false;
// Clear the global references
window.WebRTCEvents.isConnected = false;
window.WebRTCEvents.dataChannel = null;
}
async toggleAudio() {
if (this.isLoading) return;
if (this.isActive) {
this.cleanup();
this.updateButtonText();
} else {
await this.initializeWebRTC();
this.updateButtonText();
}
}
disconnect() {
this.cleanup();
this.ephemeralKey = null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment