Skip to content

Instantly share code, notes, and snippets.

@benkant
Created April 16, 2025 16:22
Show Gist options
  • Save benkant/7384cb19e70508e5e8758af4f749e869 to your computer and use it in GitHub Desktop.
Save benkant/7384cb19e70508e5e8758af4f749e869 to your computer and use it in GitHub Desktop.
AudioWorklet processor
// audio-processor.js
// This AudioWorkletProcessor receives audio input and sends the raw Float32 data to the main thread.
class AudioProcessor extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
// 'inputs' is an array of arrays; assume the first input and first channel (mono input).
if (inputs.length > 0 && inputs[0].length > 0) {
const channelData = inputs[0][0]; // This is a Float32Array of audio samples.
// Create a copy of the data.
const audioChunk = new Float32Array(channelData);
// Post the audio chunk to the main thread.
this.port.postMessage(audioChunk);
}
// Returning true keeps the processor alive.
return true;
}
}
registerProcessor('audio-processor', AudioProcessor);
// main.js
// Assume that 'ws' is an open WebSocket connection to OpenAI’s realtime endpoint.
const OPENAI_API_KEY = 'sk-XXXXXXXXXXXXXXXXXXXXXXXX'; // Replace with your key.
const MODEL_ID = 'gpt-4o-realtime-preview-2024-10-01';
const ws = new WebSocket(`wss://api.openai.com/v1/realtime?model=${MODEL_ID}`, [], {
// In many browser environments, direct header configuration is not supported.
// For production, consider proxying via your own server.
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'OpenAI-Beta': 'realtime=v1'
}
});
// This function sets up microphone capture, connects it to an AudioWorklet, and sends audio data.
async function initAudioWorkletAndMic() {
try {
// Request access to the microphone.
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Create an AudioContext; sampleRate may be adjusted per API requirements.
const audioContext = new AudioContext({ sampleRate: 24000 });
// Load the AudioWorklet module.
await audioContext.audioWorklet.addModule('audio-processor.js');
// Create a MediaStreamSource node from the microphone stream.
const sourceNode = audioContext.createMediaStreamSource(stream);
// Create an instance of the AudioWorkletNode using our "audio-processor".
const audioWorkletNode = new AudioWorkletNode(audioContext, 'audio-processor');
// Optionally, if you want to hear your own audio you could connect it to destination:
// audioWorkletNode.connect(audioContext.destination);
// Listen for messages from the AudioWorklet (each message is a Float32Array chunk).
audioWorkletNode.port.onmessage = (event) => {
const float32Array = event.data; // Audio chunk as Float32Array.
// Convert from 32-bit float (range -1 to 1) to 16-bit PCM.
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
// Clamp the value just in case and convert to 16-bit.
const sample = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = sample < 0 ? sample * 32768 : sample * 32767;
}
// Convert the Int16Array into a binary string.
let binaryString = "";
for (let i = 0; i < int16Array.length; i++) {
binaryString += String.fromCharCode(int16Array[i]);
}
// Base64 encode the binary string.
const base64Audio = btoa(binaryString);
// Package the audio data per OpenAI's expected event structure.
const audioMessage = {
type: 'input_audio_buffer.append',
audio: base64Audio
};
// Send the audio message over the WebSocket if it is open.
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify(audioMessage));
}
};
// Connect the audio source to the worklet node.
sourceNode.connect(audioWorkletNode);
} catch (err) {
console.error('Error initializing audio capture and worklet:', err);
}
}
// Initialize microphone capture and AudioWorklet.
initAudioWorkletAndMic();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment