Created
November 23, 2024 20:47
-
-
Save dcaponi/2598474fa7a390986aacb7ffe36e06a8 to your computer and use it in GitHub Desktop.
Browser Audio Recording For STT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import AudioRecorder from 'audio-recorder-polyfill'; | |
import * as lamejs from '@breezystack/lamejs'; | |
let mediaRecorder: MediaRecorder | null = null; | |
let recordedChunks: Blob[] = []; | |
let startTime: number; | |
let stopTime: number | |
export const startRecording = async (): Promise<void> => { | |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
mediaRecorder = new AudioRecorder(stream); | |
if (!mediaRecorder) throw new Error('MediaRecorder is not initialized.'); | |
mediaRecorder.addEventListener('dataavailable', (event: BlobEvent) => { | |
recordedChunks.push(event.data); | |
}); | |
startTime = Date.now(); | |
mediaRecorder.start(); | |
} | |
export const stopRecording = async (): Promise<[Blob, number]> => { | |
return new Promise((resolve) => { | |
if (!mediaRecorder) throw new Error('MediaRecorder is not initialized.'); | |
mediaRecorder.addEventListener('stop', () => { | |
mediaRecorder?.stream.getTracks().forEach((track) => track.stop()); | |
const audioBlob = new Blob(recordedChunks, { type: 'audio/mp3' }); | |
recordedChunks = []; | |
stopTime = Date.now() | |
resolve([audioBlob, stopTime - startTime]); | |
}); | |
mediaRecorder.stop(); | |
}); | |
} | |
export const compressAudioBlob = async (audioBlob: Blob): Promise<Blob> => { | |
return new Promise((resolve, reject) => { | |
const r = new FileReader(); | |
r.onload = () => { | |
let data = r.result as ArrayBuffer; | |
if (data.byteLength % 2 !== 0) { | |
// If not, create a new ArrayBuffer with the correct length | |
data = data.slice(0, data.byteLength - 1); | |
} | |
const mp3Encoder = new lamejs.Mp3Encoder(1, 44100, 128); // Mono channel, 44100 Hz, 128 kbps | |
const mp3Data = mp3Encoder.encodeBuffer(new Int16Array(data)); | |
mp3Encoder.flush(); | |
resolve(new Blob([mp3Data], { type: 'audio/mp3' })); | |
}; | |
r.onerror = (error) => reject(error); | |
r.readAsArrayBuffer(audioBlob); | |
}); | |
}; | |
// This is the callback that gets tied to the button | |
const toggleRecording = async () => { | |
if (recordingState === 'idle') { | |
await startRecording(); | |
recordingState = 'recording'; | |
return; | |
} | |
if (recordingState === 'recording') { | |
try { | |
let [audioBlob, elapsedTime] = await stopRecording(); | |
recordingState = 'transcribing'; | |
// Check if the size is less than 25MB | |
if (audioBlob.size > 25 * 1024 * 1024) { | |
audioBlob = await compressAudioBlob(audioBlob); | |
if (audioBlob.size > 25 * 1024 * 1024) | |
throw new Error(`[ERROR] Given answer is too long. Speaking time: ${elapsedTime}`); | |
} | |
dispatch('audio', { audioBlob, elapsedTime }); // dispatch is a sveltekit thing. You can also simply ship the mp3 to your server side code and pass it to your STT vendor. | |
return; | |
} catch (error) { | |
console.error('Error occurred during transcription:', error); | |
dispatch('error', 'long answer error'); | |
} finally { | |
recordingState = 'idle'; | |
} | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment