lmcarreiro · July 29, 2021 21:25
diff --git a/useSpeechToText.diff b/useSpeechToText.diff
 + const BUFFER_SECONDS = 2;

  export default function useSpeechToText(
    speechToTextEnabled: boolean,
    muted: boolean,
 
   newMessage: (message: { text: string; isFinal: boolean }) => void,
  ) {

 +   const bufferBlocks = React.useRef<{ duration: number; bytes: ArrayBufferLike }[]>([]);


    // ...


 -   // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
 -   React.useEffect(() => {
 -     if (shouldStream) {
 -       console.log("Voice activity detected, starting streaming current buffer + live streaming...");
 -       streamingFlagRef.current = true;
 -     } else {
 -       console.log("No voice activity detected, stopped streaming.");
 -       const timeout = setTimeout(() => {
 -         streamingFlagRef.current = false;
 -       }, 2_000);
 -
 -       return () => clearInterval(timeout);
 -     }
 -   }, [shouldStream]);
 +   // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
 +   React.useEffect(() => {
 +     if (shouldStream) {
 +       if (!streamingFlagRef.current) {
 +         console.log("Voice activity detected, starting streaming current buffer + live streaming...");
 +         streamingFlagRef.current = true;
 +       }
 +     } else {
 +       if (!streamingFlagRef.current) return;
 +
 +       console.log("Stop detecting voice activity, will stop streaming in 2 seconds...");
 +
 +       const stopStreamingTimer = setTimeout(() => {
 +         console.log("Stopped streaming after 2 seconds without voice activity.");
 +         streamingFlagRef.current = false;
 +       }, 2_000);
 +
 +       return () => {
 +         if (streamingFlagRef.current) {
 +           console.log("Voice activity detected, continue streaming...");
 +         }
 +
 +         clearTimeout(stopStreamingTimer);
 +       };
 +     }
 +   }, [shouldStream]);


    // ...


        const onAudioProcess = (ev: AudioProcessingEvent) => {
          const block = {
            duration: ev.inputBuffer.duration,
            bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)),
          };

 -         if (streamingFlagRef.current) {
 -           pushStream.write(block.bytes);
 -         }
 +         // If not streaming, keep the current audio on buffer to be send when start streaming
 +         if (!streamingFlagRef.current) {
 +           const totalDuration = bufferBlocks.current.reduce((sum, curr) => sum + curr.duration, 0);
 +
 +           if (totalDuration >= BUFFER_SECONDS) {
 +             bufferBlocks.current.shift();
 +           }
 +
 +           bufferBlocks.current.push(block);
 +         }
 +         // If streaming, send first the current data from the buffer (if there are any), then send the current live streaming
 +         else {
 +           while (bufferBlocks.current.length) {
 +             pushStream.write(bufferBlocks.current.shift()!.bytes);
 +           }
 +
 +           pushStream.write(block.bytes);
 +         }
        };

        // ...

        return () => {
          console.log("############## stop()");

          recognizer.stopContinuousRecognitionAsync();
          running.current = false;
 +         bufferBlocks.current = [];

          processor.removeEventListener("audioprocess", onAudioProcess);
          processor.disconnect(output);
          input.disconnect(processor);
          context.close();
        };


      // ...
	+ const BUFFER_SECONDS = 2;

	export default function useSpeechToText(
	speechToTextEnabled: boolean,
	muted: boolean,

	newMessage: (message: { text: string; isFinal: boolean }) => void,
	) {

	+ const bufferBlocks = React.useRef<{ duration: number; bytes: ArrayBufferLike }[]>([]);


	// ...


	- // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
	- React.useEffect(() => {
	- if (shouldStream) {
	- console.log("Voice activity detected, starting streaming current buffer + live streaming...");
	- streamingFlagRef.current = true;
	- } else {
	- console.log("No voice activity detected, stopped streaming.");
	- const timeout = setTimeout(() => {
	- streamingFlagRef.current = false;
	- }, 2_000);
	-
	- return () => clearInterval(timeout);
	- }
	- }, [shouldStream]);
	+ // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
	+ React.useEffect(() => {
	+ if (shouldStream) {
	+ if (!streamingFlagRef.current) {
	+ console.log("Voice activity detected, starting streaming current buffer + live streaming...");
	+ streamingFlagRef.current = true;
	+ }
	+ } else {
	+ if (!streamingFlagRef.current) return;
	+
	+ console.log("Stop detecting voice activity, will stop streaming in 2 seconds...");
	+
	+ const stopStreamingTimer = setTimeout(() => {
	+ console.log("Stopped streaming after 2 seconds without voice activity.");
	+ streamingFlagRef.current = false;
	+ }, 2_000);
	+
	+ return () => {
	+ if (streamingFlagRef.current) {
	+ console.log("Voice activity detected, continue streaming...");
	+ }
	+
	+ clearTimeout(stopStreamingTimer);
	+ };
	+ }
	+ }, [shouldStream]);


	// ...


	const onAudioProcess = (ev: AudioProcessingEvent) => {
	const block = {
	duration: ev.inputBuffer.duration,
	bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)),
	};

	- if (streamingFlagRef.current) {
	- pushStream.write(block.bytes);
	- }
	+ // If not streaming, keep the current audio on buffer to be send when start streaming
	+ if (!streamingFlagRef.current) {
	+ const totalDuration = bufferBlocks.current.reduce((sum, curr) => sum + curr.duration, 0);
	+
	+ if (totalDuration >= BUFFER_SECONDS) {
	+ bufferBlocks.current.shift();
	+ }
	+
	+ bufferBlocks.current.push(block);
	+ }
	+ // If streaming, send first the current data from the buffer (if there are any), then send the current live streaming
	+ else {
	+ while (bufferBlocks.current.length) {
	+ pushStream.write(bufferBlocks.current.shift()!.bytes);
	+ }
	+
	+ pushStream.write(block.bytes);
	+ }
	};

	// ...

	return () => {
	console.log("############## stop()");

	recognizer.stopContinuousRecognitionAsync();
	running.current = false;
	+ bufferBlocks.current = [];

	processor.removeEventListener("audioprocess", onAudioProcess);
	processor.disconnect(output);
	input.disconnect(processor);
	context.close();
	};


	// ...