Skip to content

Instantly share code, notes, and snippets.

@KristofferEriksson
Last active February 20, 2024 05:33
Show Gist options
  • Save KristofferEriksson/d9dba72519c3caaf9de8d4774850b929 to your computer and use it in GitHub Desktop.
Save KristofferEriksson/d9dba72519c3caaf9de8d4774850b929 to your computer and use it in GitHub Desktop.
An experimental React hook for real-time speech-to-text using the Web Speech API
import { useCallback, useEffect, useState } from "react";
// Define custom types for SpeechRecognition and SpeechRecognitionEvent
interface ISpeechRecognitionEvent extends Event {
results: SpeechRecognitionResultList;
resultIndex: number;
}
interface ISpeechRecognition extends EventTarget {
lang: string;
continuous: boolean;
interimResults: boolean;
maxAlternatives: number;
start: () => void;
stop: () => void;
onresult: (event: ISpeechRecognitionEvent) => void;
onerror: (event: Event) => void;
onend: () => void;
}
declare global {
interface Window {
SpeechRecognition: new () => ISpeechRecognition;
webkitSpeechRecognition: new () => ISpeechRecognition;
}
}
interface UseSpeechToTextProps {
lang?: string;
continuous?: boolean;
interimResults?: boolean;
maxAlternatives?: number;
onResult?: (result: string) => void;
onError?: (error: string) => void;
}
export const useSpeechToText = ({
lang = "en-US",
continuous = true,
interimResults = true,
maxAlternatives = 1,
onResult,
onError,
}: UseSpeechToTextProps = {}) => {
const [isListening, setIsListening] = useState(false);
const [transcript, setTranscript] = useState("");
const [lastProcessedIndex, setLastProcessedIndex] = useState(0);
const recognition: ISpeechRecognition | null =
typeof window !== "undefined" &&
(window.SpeechRecognition || window.webkitSpeechRecognition)
? new (window.SpeechRecognition || window.webkitSpeechRecognition)()
: null;
const handleResult = useCallback(
(event: ISpeechRecognitionEvent) => {
let interimTranscript = "";
let finalTranscript = "";
// Iterate through all the current results
for (let i = lastProcessedIndex; i < event.results.length; i++) {
const result = event.results[i];
// If the result is final, append to the final transcript
if (result.isFinal) {
finalTranscript += result[0].transcript + " ";
setLastProcessedIndex(i + 1);
} else {
// Otherwise, append to the interim transcript
interimTranscript += result[0].transcript + " ";
}
}
// Update the transcript state with a combination of the final and interim results
setTranscript(transcript + finalTranscript + interimTranscript);
// Invoke callback with the latest transcript
onResult && onResult(transcript + finalTranscript + interimTranscript);
},
[onResult, transcript, lastProcessedIndex],
);
// start and stop functions using useCallback
const start = useCallback(() => {
if (!recognition || isListening) return;
setTranscript("");
setLastProcessedIndex(0);
setIsListening(true);
recognition.start();
}, [recognition, isListening]);
const stop = useCallback(() => {
if (!recognition || !isListening) return;
recognition.stop();
setIsListening(false);
}, [recognition, isListening]);
useEffect(() => {
if (!recognition) {
onError &&
onError("Speech recognition is not supported in this browser.");
return;
}
recognition.lang = lang;
recognition.continuous = continuous;
recognition.interimResults = interimResults;
recognition.maxAlternatives = maxAlternatives;
recognition.onresult = handleResult;
recognition.onerror = (event) => onError && onError(event.type);
recognition.onend = () => {
setIsListening(false);
};
return () => {
if (isListening) recognition.stop();
};
}, [
lang,
continuous,
interimResults,
maxAlternatives,
handleResult,
onError,
recognition,
start,
stop,
isListening,
]);
return { start, stop, transcript, isListening };
};
export default useSpeechToText;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment