Last active
February 20, 2024 05:33
-
-
Save KristofferEriksson/d9dba72519c3caaf9de8d4774850b929 to your computer and use it in GitHub Desktop.
An experimental React hook for real-time speech-to-text using the Web Speech API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { useCallback, useEffect, useState } from "react"; | |
// Define custom types for SpeechRecognition and SpeechRecognitionEvent | |
interface ISpeechRecognitionEvent extends Event { | |
results: SpeechRecognitionResultList; | |
resultIndex: number; | |
} | |
interface ISpeechRecognition extends EventTarget { | |
lang: string; | |
continuous: boolean; | |
interimResults: boolean; | |
maxAlternatives: number; | |
start: () => void; | |
stop: () => void; | |
onresult: (event: ISpeechRecognitionEvent) => void; | |
onerror: (event: Event) => void; | |
onend: () => void; | |
} | |
declare global { | |
interface Window { | |
SpeechRecognition: new () => ISpeechRecognition; | |
webkitSpeechRecognition: new () => ISpeechRecognition; | |
} | |
} | |
interface UseSpeechToTextProps { | |
lang?: string; | |
continuous?: boolean; | |
interimResults?: boolean; | |
maxAlternatives?: number; | |
onResult?: (result: string) => void; | |
onError?: (error: string) => void; | |
} | |
export const useSpeechToText = ({ | |
lang = "en-US", | |
continuous = true, | |
interimResults = true, | |
maxAlternatives = 1, | |
onResult, | |
onError, | |
}: UseSpeechToTextProps = {}) => { | |
const [isListening, setIsListening] = useState(false); | |
const [transcript, setTranscript] = useState(""); | |
const [lastProcessedIndex, setLastProcessedIndex] = useState(0); | |
const recognition: ISpeechRecognition | null = | |
typeof window !== "undefined" && | |
(window.SpeechRecognition || window.webkitSpeechRecognition) | |
? new (window.SpeechRecognition || window.webkitSpeechRecognition)() | |
: null; | |
const handleResult = useCallback( | |
(event: ISpeechRecognitionEvent) => { | |
let interimTranscript = ""; | |
let finalTranscript = ""; | |
// Iterate through all the current results | |
for (let i = lastProcessedIndex; i < event.results.length; i++) { | |
const result = event.results[i]; | |
// If the result is final, append to the final transcript | |
if (result.isFinal) { | |
finalTranscript += result[0].transcript + " "; | |
setLastProcessedIndex(i + 1); | |
} else { | |
// Otherwise, append to the interim transcript | |
interimTranscript += result[0].transcript + " "; | |
} | |
} | |
// Update the transcript state with a combination of the final and interim results | |
setTranscript(transcript + finalTranscript + interimTranscript); | |
// Invoke callback with the latest transcript | |
onResult && onResult(transcript + finalTranscript + interimTranscript); | |
}, | |
[onResult, transcript, lastProcessedIndex], | |
); | |
// start and stop functions using useCallback | |
const start = useCallback(() => { | |
if (!recognition || isListening) return; | |
setTranscript(""); | |
setLastProcessedIndex(0); | |
setIsListening(true); | |
recognition.start(); | |
}, [recognition, isListening]); | |
const stop = useCallback(() => { | |
if (!recognition || !isListening) return; | |
recognition.stop(); | |
setIsListening(false); | |
}, [recognition, isListening]); | |
useEffect(() => { | |
if (!recognition) { | |
onError && | |
onError("Speech recognition is not supported in this browser."); | |
return; | |
} | |
recognition.lang = lang; | |
recognition.continuous = continuous; | |
recognition.interimResults = interimResults; | |
recognition.maxAlternatives = maxAlternatives; | |
recognition.onresult = handleResult; | |
recognition.onerror = (event) => onError && onError(event.type); | |
recognition.onend = () => { | |
setIsListening(false); | |
}; | |
return () => { | |
if (isListening) recognition.stop(); | |
}; | |
}, [ | |
lang, | |
continuous, | |
interimResults, | |
maxAlternatives, | |
handleResult, | |
onError, | |
recognition, | |
start, | |
stop, | |
isListening, | |
]); | |
return { start, stop, transcript, isListening }; | |
}; | |
export default useSpeechToText; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment