Last active
September 2, 2022 13:22
-
-
Save seatedro/71c41e5de26c2bf6b5244e695ef2e9d3 to your computer and use it in GitHub Desktop.
Transcription Server 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import speech, { SpeechClient } from '@google-cloud/speech'; | |
import { google } from '@google-cloud/speech/build/protos/protos'; | |
import * as pumpify from 'pumpify'; | |
import chalk from 'chalk'; | |
import { Socket } from 'socket.io'; | |
let speechClient: SpeechClient | null = null; | |
class SpeechToTextUtils { | |
recognizeStream!: pumpify | null; | |
resultEndTime = 0; | |
isFinalEndTime = 0; | |
finalRequestEndTime = 0; | |
bridgingOffset = 0; | |
streamingLimit = 290000; | |
restartCounter = 0; | |
lastTranscriptWasFinal = false; | |
audioInput: DataView[] = []; | |
lastAudioInput: DataView[] = []; | |
newStream = true; | |
socket!: Socket; | |
request!: google.cloud.speech.v1.IStreamingRecognitionConfig | undefined; | |
restartTimeout: NodeJS.Timeout | undefined; | |
set _socket(value: Socket) { | |
this.socket = value; | |
} | |
set _request(value: google.cloud.speech.v1.IStreamingRecognitionConfig) { | |
this.request = value; | |
} | |
startRecognitionStream() { | |
this.audioInput = []; | |
if (!speechClient) { | |
speechClient = new speech.SpeechClient(); // Creates a client | |
} | |
this.recognizeStream = speechClient | |
.streamingRecognize(this.request) | |
.on('error', (err) => { | |
console.error('Error when processing audio: ' + err); | |
this.socket.emit('googleCloudStreamError', err); | |
this.stopRecognitionStream(); | |
}) | |
.on('data', this.speechCallback.bind(this)); | |
this.restartTimeout = setTimeout( | |
this.restartStream.bind(this), | |
this.streamingLimit | |
); | |
} | |
speechCallback(stream: google.cloud.speech.v1.StreamingRecognizeResponse) { | |
// Null checks | |
if ( | |
stream.results && | |
stream.results[0] && | |
stream.results[0].resultEndTime && | |
stream.results[0].resultEndTime.nanos && | |
stream.results[0].resultEndTime.seconds && | |
stream.results[0].alternatives && | |
stream.results[0].isFinal | |
) { | |
// Convert API result end time from seconds + nanoseconds to milliseconds | |
// The below seconds are useful to see the timestamps in the console | |
let seconds: number; | |
if (typeof stream.results[0].resultEndTime.seconds === 'string') | |
seconds = parseInt(stream.results[0].resultEndTime.seconds); | |
else if (Long.isLong(stream.results[0].resultEndTime.seconds)) | |
seconds = stream.results[0].resultEndTime.seconds.toNumber(); | |
else seconds = stream.results[0].resultEndTime.seconds; | |
this.resultEndTime = | |
seconds * 1000 + | |
Math.round(stream.results[0].resultEndTime.nanos / 1000000); | |
// Calculate correct time based on offset from audio sent twice | |
const correctedTime = | |
this.resultEndTime - | |
this.bridgingOffset + | |
this.streamingLimit * this.restartCounter; | |
process.stdout.clearLine(0); | |
process.stdout.cursorTo(0); | |
let stdoutText = ''; | |
if (stream.results[0] && stream.results[0].alternatives[0]) { | |
stdoutText = | |
correctedTime + ': ' + stream.results[0].alternatives[0].transcript; | |
} | |
if (stream.results[0].isFinal) { | |
process.stdout.write(chalk.green(`${stdoutText}\n`)); | |
this.socket.emit( | |
'speechData', | |
stream.results[0].alternatives[0].transcript | |
); | |
this.isFinalEndTime = this.resultEndTime; | |
this.lastTranscriptWasFinal = true; | |
} else { | |
// Make sure transcript does not exceed console character length | |
if (stdoutText.length > process.stdout.columns) { | |
stdoutText = | |
stdoutText.substring(0, process.stdout.columns - 4) + '...'; | |
} | |
process.stdout.write(chalk.red(`${stdoutText}`)); | |
this.lastTranscriptWasFinal = false; | |
} | |
} | |
} | |
} | |
export default new SpeechToTextUtils(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment