Created
October 23, 2023 16:33
-
-
Save hughrawlinson/df9f6d8f2f543a78433dd4381203c3ce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export * from "./Recorder"; | |
export * from "./RecordingState"; | |
export * from "./VoiceRecorderEventTarget"; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export class MediaRecorderWithStorage { | |
#mediaRecorder: MediaRecorder; | |
#blobs: Blob[] = []; | |
#blobOptions?: BlobPropertyBag; | |
constructor( | |
stream: MediaStream, | |
mediaRecorderOptions: MediaRecorderOptions, | |
blobOptions?: BlobPropertyBag | |
) { | |
this.#mediaRecorder = new MediaRecorder(stream, mediaRecorderOptions); | |
this.#blobOptions = blobOptions; | |
this.#mediaRecorder.addEventListener("dataavailable", e => { | |
this.#blobs.push(e.data); | |
}); | |
// this.#mediaRecorder.addEventListener("error", event => { | |
// console.log(event); | |
// }); | |
this.#mediaRecorder.start(1000); | |
} | |
stop(recordingHandler?: (recording: Blob) => void) { | |
if (recordingHandler) { | |
if (this.#mediaRecorder.state === "inactive") { | |
recordingHandler(new Blob(this.#blobs, this.#blobOptions)); | |
} | |
this.#mediaRecorder.addEventListener("stop", () => { | |
recordingHandler(new Blob(this.#blobs, this.#blobOptions)); | |
}); | |
} | |
this.#mediaRecorder.stop(); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { MediaRecorderWithStorage } from "./MediaRecorderWithStorage"; | |
export class PreRecorder { | |
#holdRecording = false; | |
#stream: MediaStream; | |
#mediaRecorders: MediaRecorderWithStorage[]; | |
#mediaRecorderInitOptions: MediaRecorderOptions; | |
#blobOptions: BlobPropertyBag | undefined; | |
#recordingHandler: (blob: Blob) => void; | |
constructor( | |
flipTime: number, | |
stream: MediaStream, | |
onRecordingAvailable: (blob: Blob) => void, | |
mediaRecorderOptions: MediaRecorderOptions, | |
blobOptions?: BlobPropertyBag | |
) { | |
this.#stream = stream; | |
this.#mediaRecorderInitOptions = mediaRecorderOptions; | |
this.#blobOptions = blobOptions; | |
this.#mediaRecorders = [ | |
new MediaRecorderWithStorage( | |
stream, | |
this.#mediaRecorderInitOptions, | |
this.#blobOptions | |
), | |
new MediaRecorderWithStorage( | |
stream, | |
this.#mediaRecorderInitOptions, | |
this.#blobOptions | |
), | |
]; | |
this.#recordingHandler = onRecordingAvailable; | |
setInterval(() => { | |
this.#flip(); | |
}, flipTime); | |
} | |
hold = () => { | |
this.#holdRecording = true; | |
}; | |
release = () => { | |
this.#mediaRecorders[0].stop(recording => { | |
this.#holdRecording = false; | |
this.#recordingHandler(recording); | |
}); | |
}; | |
#flip = () => { | |
if (this.#holdRecording) { | |
return; | |
} | |
const oldRecorder = this.#mediaRecorders.shift(); | |
oldRecorder?.stop(); | |
this.#mediaRecorders.push( | |
new MediaRecorderWithStorage( | |
this.#stream, | |
this.#mediaRecorderInitOptions, | |
this.#blobOptions | |
) | |
); | |
}; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { PreRecorder } from "./PreRecorder"; | |
import { RecordingState } from "./RecordingState"; | |
import { | |
typedVoiceRecorderEventTarget, | |
ErrorEvent, | |
RecordingStartedEvent, | |
RecordingCompleteEvent, | |
} from "./VoiceRecorderEventTarget"; | |
import vad from "./voice-activity-detector"; | |
const OGG = "audio/ogg"; | |
const WEBM = "audio/webm; codecs=opus"; | |
const PCM = "audio/webm; codecs=pcm"; | |
const UNSUPPORTED = "UNSUPPORTED"; | |
const AUDIO_CODEC = MediaRecorder.isTypeSupported(OGG) | |
? OGG | |
: MediaRecorder.isTypeSupported(WEBM) | |
? WEBM | |
: MediaRecorder.isTypeSupported(PCM) | |
? PCM | |
: UNSUPPORTED; | |
// const CHUNK_SIZE = 500; | |
const FLIP_TIME = 500; | |
export class VoiceRecorder extends typedVoiceRecorderEventTarget { | |
recording: RecordingState = RecordingState.new(); | |
#debounceTime = 1; | |
#audioContext: AudioContext; | |
#vadControl: ReturnType<typeof vad> | undefined; | |
#mediaStream: MediaStream | undefined; | |
#preRecorder: PreRecorder | undefined; | |
#starts: number[] = []; | |
#stops: number[] = []; | |
// TODO: Allow device selection | |
#devices: MediaDeviceInfo[]; | |
/** | |
* Prepare an voice activated audio recorder. | |
* | |
* If you are in an environment that requires user permission for microphone access, you must call `requestMicrophonePermission()` | |
* | |
* You must call `start()` to start recieving events | |
* | |
* @param debounceTime margin between end of voice activity and end of recording, in seconds | |
* @param audioContext provide an audio context. If you don't, one will be created, but then you won't be able to provide a source | |
*/ | |
constructor(debounceTime?: number, audioContext?: AudioContext) { | |
super(); | |
if (AUDIO_CODEC === UNSUPPORTED) { | |
throw new Error( | |
"MediaRecorder doesn't support any of the accepted audio codecs in this environment" | |
); | |
} | |
this.#debounceTime = debounceTime ?? this.#debounceTime; | |
this.#audioContext = audioContext ?? new AudioContext(); | |
this.#devices = []; | |
} | |
/** | |
* Must get user permsision to access microphone | |
*/ | |
requestMicrophonePermission = async () => { | |
try { | |
this.#mediaStream = await navigator.mediaDevices.getUserMedia({ | |
audio: true, | |
}); | |
this.#devices = await navigator.mediaDevices.enumerateDevices(); | |
this.#preRecorder = new PreRecorder( | |
FLIP_TIME, | |
this.#mediaStream, | |
recording => { | |
if (RecordingState.isRecording(this.recording)) { | |
this.dispatchEvent( | |
new RecordingCompleteEvent(this.recording.startTime, recording) | |
); | |
this.recording = RecordingState.stopRecording(this.recording); | |
} | |
}, | |
{ mimeType: AUDIO_CODEC }, | |
{ type: AUDIO_CODEC } | |
); | |
this.#audioContext.resume(); | |
return; | |
} catch (error) { | |
this.dispatchEvent(new ErrorEvent(error)); | |
} | |
}; | |
/** | |
* In environments that restrict audio contexts from running without user | |
* input, this function should be called in a user input handler like onClick. | |
* | |
* Otherwise, it can be called in non-handler code. | |
*/ | |
start = async (source?: MediaStream) => { | |
// const stream = | |
// source || (await navigator.mediaDevices.getUserMedia({ audio: true })); | |
this.#vadControl = vad(this.#audioContext, this.#mediaStream, { | |
onVoiceStart: this.#onVoiceStart, | |
onVoiceStop: this.#onVoiceStop, | |
}); | |
this.#vadControl.enable(); | |
}; | |
/** | |
* Clean up all associated data | |
*/ | |
destroy = () => { | |
this.#vadControl?.destroy(); | |
this.#audioContext.close(); | |
}; | |
#onVoiceStart = () => { | |
this.#starts.push(this.#audioContext.currentTime); | |
if (!this.#vadControl) { | |
// We're haven't got prerequisites set up and can't start a recording | |
return; | |
} | |
if (RecordingState.isRecording(this.recording)) { | |
// A recording is already running, we can't start a new one | |
return; | |
} | |
this.#startRecording(); | |
}; | |
#onVoiceStop = () => { | |
const stopTime = this.#audioContext.currentTime; | |
this.#stops.push(stopTime); | |
setTimeout(() => { | |
const latestStart = this.#starts.at(-1); | |
if (!latestStart) { | |
// We've stopped before we've started | |
return; | |
} | |
if (RecordingState.isStopped(this.recording)) { | |
// We're not currently recording so we can't complete a recording | |
return; | |
} | |
if (!this.#vadControl) { | |
// We haven't set up our voice activity detector yet | |
return; | |
} | |
if (latestStart >= stopTime) { | |
// We've had a voice onset since the stop was triggered, so we shouldn't | |
// stop the recording | |
return; | |
} | |
this.#stopRecording(); | |
}, this.#debounceTime * 1000); | |
}; | |
#startRecording = () => { | |
if (!this.#mediaStream) { | |
throw new Error("Media stream is gone, can no longer record"); | |
} | |
if (RecordingState.isStopped(this.recording)) { | |
this.#preRecorder?.hold(); | |
const recordingStartEvent = new RecordingStartedEvent(); | |
this.recording = RecordingState.startRecording( | |
this.recording, | |
recordingStartEvent.startTime | |
); | |
this.dispatchEvent(recordingStartEvent); | |
} | |
}; | |
#stopRecording = () => { | |
if (!this.#preRecorder) { | |
throw new Error("There's no running recorder, can't stop a recording"); | |
} | |
if (RecordingState.isRecording(this.recording)) { | |
this.#preRecorder.release(); | |
} | |
}; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
interface Recording { | |
state: "recording"; | |
startTime: number; | |
} | |
interface NotRecording { | |
state: "not-recording"; | |
} | |
export type RecordingState = Recording | NotRecording; | |
export const RecordingState = { | |
new: (): NotRecording => { | |
return { state: "not-recording" }; | |
}, | |
startRecording: ( | |
recordingState: NotRecording, | |
startTime: number | |
): Recording => { | |
if (!RecordingState.isStopped(recordingState)) { | |
throw new Error("Could not start recording, recording already running"); | |
} | |
return { | |
state: "recording", | |
startTime: startTime ?? Date.now(), | |
}; | |
}, | |
stopRecording: (recordingState: Recording): NotRecording => { | |
if (!RecordingState.isRecording(recordingState)) { | |
throw new Error("Could not stop recording, recording not running"); | |
} | |
return RecordingState.new(); | |
}, | |
isRecording: ( | |
recordingState: RecordingState | |
): recordingState is Recording => { | |
return recordingState.state === "recording"; | |
}, | |
isStopped: ( | |
recordingState: RecordingState | |
): recordingState is NotRecording => { | |
return recordingState.state === "not-recording"; | |
}, | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// @ts-nocheck | |
// https://github.com/wjw12/voice-activity-detector/blob/main/src/voice-activity-detector.js | |
'use strict'; | |
function clamp(value, min, max) { | |
return min < max | |
? (value < min ? min : value > max ? max : value) | |
: (value < max ? max : value > min ? min : value) | |
} | |
function frequencyToIndex(frequency, sampleRate, frequencyBinCount) { | |
var nyquist = sampleRate / 2 | |
var index = Math.round(frequency / nyquist * frequencyBinCount) | |
return clamp(index, 0, frequencyBinCount) | |
} | |
function fourierAverage(frequencies, minHz, maxHz, sampleRate, binCount) { | |
var start = frequencyToIndex(minHz, sampleRate, binCount) | |
var end = frequencyToIndex(maxHz, sampleRate, binCount) | |
var count = end - start | |
var sum = 0 | |
for (; start < end; start++) { | |
sum += frequencies[start] / 255.0 | |
} | |
return count === 0 ? 0 : (sum / count) | |
} | |
const vad = function(audioContext, stream, opts) { | |
opts = opts || {}; | |
var defaults = { | |
fftSize: 512, | |
bufferLen: 512, | |
smoothingTimeConstant: 0.8, | |
minCaptureFreq: 85, // in Hz | |
maxCaptureFreq: 1000, // in Hz | |
noiseCaptureDuration: 1000, // in ms | |
minNoiseLevel: 0.3, // from 0 to 1 | |
maxNoiseLevel: 0.7, // from 0 to 1 | |
avgNoiseMultiplier: 1.1, | |
onVoiceStart: function() { | |
}, | |
onVoiceStop: function() { | |
}, | |
onUpdate: function(val) { | |
} | |
}; | |
var options = {}; | |
for (var key in defaults) { | |
options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key]; | |
} | |
var baseLevel = 0; | |
var voiceScale = 1; | |
var activityCounter = 0; | |
var activityCounterMin = 0; | |
var activityCounterMax = 30; | |
var activityCounterThresh = 5; | |
var envFreqRange = []; | |
var isNoiseCapturing = true; | |
var prevVadState = undefined; | |
var vadState = false; | |
var captureTimeout = null; | |
var source = audioContext.createMediaStreamSource(stream); | |
var analyser = audioContext.createAnalyser(); | |
analyser.smoothingTimeConstant = options.smoothingTimeConstant; | |
analyser.fftSize = options.fftSize; | |
var frequencies = new Uint8Array(analyser.frequencyBinCount); | |
connect(); | |
var raf = null; | |
function processVAD() { | |
analyser.getByteFrequencyData(frequencies); | |
var average = fourierAverage(frequencies, options.minCaptureFreq, options.maxCaptureFreq, analyser.context.sampleRate, analyser.frequencyBinCount); | |
if (isNoiseCapturing) { | |
envFreqRange.push(average); | |
raf = requestAnimationFrame(processVAD); | |
return; | |
} | |
if (average >= baseLevel && activityCounter < activityCounterMax) { | |
activityCounter++; | |
} else if (average < baseLevel && activityCounter > activityCounterMin) { | |
activityCounter--; | |
} | |
vadState = activityCounter > activityCounterThresh; | |
if (prevVadState !== vadState) { | |
vadState ? onVoiceStart() : onVoiceStop(); | |
prevVadState = vadState; | |
} | |
options.onUpdate(Math.max(0, average - baseLevel) / voiceScale); | |
raf = requestAnimationFrame(processVAD); | |
} | |
if (isNoiseCapturing) { | |
console.log('VAD: start noise capturing'); | |
captureTimeout = setTimeout(init, options.noiseCaptureDuration); | |
} | |
function init() { | |
console.log('VAD: stop noise capturing'); | |
isNoiseCapturing = false; | |
captureTimeout = null; | |
envFreqRange = envFreqRange.filter(function(val) { | |
return val; | |
}).sort(); | |
var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1); | |
baseLevel = averageEnvFreq * options.avgNoiseMultiplier; | |
if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel; | |
if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel; | |
voiceScale = 1 - baseLevel; | |
console.log('VAD: base level:', baseLevel); | |
} | |
function connect() { | |
source.connect(analyser); | |
} | |
function disconnect() { | |
analyser.disconnect(); | |
source.disconnect(); | |
} | |
function destroy() { | |
captureTimeout && clearTimeout(captureTimeout); | |
disconnect(); | |
} | |
function enable() { | |
!raf && processVAD(); | |
} | |
function disable() { | |
raf && cancelAnimationFrame(raf); | |
captureTimeout && clearTimeout(captureTimeout); | |
envFreqRange = []; | |
} | |
function onVoiceStart() { | |
options.onVoiceStart(); | |
} | |
function onVoiceStop() { | |
options.onVoiceStop(); | |
} | |
return {enable: enable, disable: disable, destroy: destroy}; | |
}; | |
export default vad; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export class ErrorEvent extends Event { | |
error; | |
constructor(error: unknown) { | |
super("error"); | |
this.error = error; | |
} | |
} | |
export class RecordingStartedEvent extends Event { | |
startTime: number; | |
constructor() { | |
super("recordingStarted"); | |
this.startTime = Date.now(); | |
} | |
} | |
export class RecordingCompleteEvent extends Event { | |
recording: Blob; | |
startTime: number; | |
endTime: number; | |
constructor(startTime: number, file: Blob) { | |
super("recordingComplete"); | |
this.startTime = startTime; | |
this.endTime = Date.now(); | |
this.recording = file; | |
} | |
} | |
interface VoiceRecorderEvent { | |
recordingStarted: RecordingStartedEvent; | |
recordingComplete: RecordingCompleteEvent; | |
} | |
type VoiceRecorderEventHandler<K extends keyof VoiceRecorderEvent> = ( | |
event: VoiceRecorderEvent[K] | |
) => void; | |
interface VoiceRecorderEventTarget extends EventTarget { | |
addEventListener<K extends keyof VoiceRecorderEvent>( | |
type: K, | |
callback: VoiceRecorderEventHandler<K> | null, | |
options?: boolean | AddEventListenerOptions | undefined | |
): void; | |
addEventListener( | |
type: string, | |
callback: EventListenerOrEventListenerObject | null, | |
options?: EventListenerOptions | boolean | |
): void; | |
} | |
// https://dev.to/43081j/strongly-typed-event-emitters-using-eventtarget-in-typescript-3658 | |
export const typedVoiceRecorderEventTarget = EventTarget as { | |
new (): VoiceRecorderEventTarget; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment