Created
April 16, 2025 16:22
-
-
Save benkant/7384cb19e70508e5e8758af4f749e869 to your computer and use it in GitHub Desktop.
AudioWorklet processor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// audio-processor.js | |
// This AudioWorkletProcessor receives audio input and sends the raw Float32 data to the main thread. | |
class AudioProcessor extends AudioWorkletProcessor { | |
process(inputs, outputs, parameters) { | |
// 'inputs' is an array of arrays; assume the first input and first channel (mono input). | |
if (inputs.length > 0 && inputs[0].length > 0) { | |
const channelData = inputs[0][0]; // This is a Float32Array of audio samples. | |
// Create a copy of the data. | |
const audioChunk = new Float32Array(channelData); | |
// Post the audio chunk to the main thread. | |
this.port.postMessage(audioChunk); | |
} | |
// Returning true keeps the processor alive. | |
return true; | |
} | |
} | |
registerProcessor('audio-processor', AudioProcessor); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// main.js | |
// Assume that 'ws' is an open WebSocket connection to OpenAI’s realtime endpoint. | |
const OPENAI_API_KEY = 'sk-XXXXXXXXXXXXXXXXXXXXXXXX'; // Replace with your key. | |
const MODEL_ID = 'gpt-4o-realtime-preview-2024-10-01'; | |
const ws = new WebSocket(`wss://api.openai.com/v1/realtime?model=${MODEL_ID}`, [], { | |
// In many browser environments, direct header configuration is not supported. | |
// For production, consider proxying via your own server. | |
headers: { | |
'Authorization': `Bearer ${OPENAI_API_KEY}`, | |
'OpenAI-Beta': 'realtime=v1' | |
} | |
}); | |
// This function sets up microphone capture, connects it to an AudioWorklet, and sends audio data. | |
async function initAudioWorkletAndMic() { | |
try { | |
// Request access to the microphone. | |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
// Create an AudioContext; sampleRate may be adjusted per API requirements. | |
const audioContext = new AudioContext({ sampleRate: 24000 }); | |
// Load the AudioWorklet module. | |
await audioContext.audioWorklet.addModule('audio-processor.js'); | |
// Create a MediaStreamSource node from the microphone stream. | |
const sourceNode = audioContext.createMediaStreamSource(stream); | |
// Create an instance of the AudioWorkletNode using our "audio-processor". | |
const audioWorkletNode = new AudioWorkletNode(audioContext, 'audio-processor'); | |
// Optionally, if you want to hear your own audio you could connect it to destination: | |
// audioWorkletNode.connect(audioContext.destination); | |
// Listen for messages from the AudioWorklet (each message is a Float32Array chunk). | |
audioWorkletNode.port.onmessage = (event) => { | |
const float32Array = event.data; // Audio chunk as Float32Array. | |
// Convert from 32-bit float (range -1 to 1) to 16-bit PCM. | |
const int16Array = new Int16Array(float32Array.length); | |
for (let i = 0; i < float32Array.length; i++) { | |
// Clamp the value just in case and convert to 16-bit. | |
const sample = Math.max(-1, Math.min(1, float32Array[i])); | |
int16Array[i] = sample < 0 ? sample * 32768 : sample * 32767; | |
} | |
// Convert the Int16Array into a binary string. | |
let binaryString = ""; | |
for (let i = 0; i < int16Array.length; i++) { | |
binaryString += String.fromCharCode(int16Array[i]); | |
} | |
// Base64 encode the binary string. | |
const base64Audio = btoa(binaryString); | |
// Package the audio data per OpenAI's expected event structure. | |
const audioMessage = { | |
type: 'input_audio_buffer.append', | |
audio: base64Audio | |
}; | |
// Send the audio message over the WebSocket if it is open. | |
if (ws.readyState === WebSocket.OPEN) { | |
ws.send(JSON.stringify(audioMessage)); | |
} | |
}; | |
// Connect the audio source to the worklet node. | |
sourceNode.connect(audioWorkletNode); | |
} catch (err) { | |
console.error('Error initializing audio capture and worklet:', err); | |
} | |
} | |
// Initialize microphone capture and AudioWorklet. | |
initAudioWorkletAndMic(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment