Skip to content

Instantly share code, notes, and snippets.

@seatedro
Last active September 2, 2022 13:22
Show Gist options
  • Save seatedro/71c41e5de26c2bf6b5244e695ef2e9d3 to your computer and use it in GitHub Desktop.
Save seatedro/71c41e5de26c2bf6b5244e695ef2e9d3 to your computer and use it in GitHub Desktop.
Transcription Server 1
import speech, { SpeechClient } from '@google-cloud/speech';
import { google } from '@google-cloud/speech/build/protos/protos';
import * as pumpify from 'pumpify';
import chalk from 'chalk';
import { Socket } from 'socket.io';
let speechClient: SpeechClient | null = null;
class SpeechToTextUtils {
recognizeStream!: pumpify | null;
resultEndTime = 0;
isFinalEndTime = 0;
finalRequestEndTime = 0;
bridgingOffset = 0;
streamingLimit = 290000;
restartCounter = 0;
lastTranscriptWasFinal = false;
audioInput: DataView[] = [];
lastAudioInput: DataView[] = [];
newStream = true;
socket!: Socket;
request!: google.cloud.speech.v1.IStreamingRecognitionConfig | undefined;
restartTimeout: NodeJS.Timeout | undefined;
set _socket(value: Socket) {
this.socket = value;
}
set _request(value: google.cloud.speech.v1.IStreamingRecognitionConfig) {
this.request = value;
}
startRecognitionStream() {
this.audioInput = [];
if (!speechClient) {
speechClient = new speech.SpeechClient(); // Creates a client
}
this.recognizeStream = speechClient
.streamingRecognize(this.request)
.on('error', (err) => {
console.error('Error when processing audio: ' + err);
this.socket.emit('googleCloudStreamError', err);
this.stopRecognitionStream();
})
.on('data', this.speechCallback.bind(this));
this.restartTimeout = setTimeout(
this.restartStream.bind(this),
this.streamingLimit
);
}
speechCallback(stream: google.cloud.speech.v1.StreamingRecognizeResponse) {
// Null checks
if (
stream.results &&
stream.results[0] &&
stream.results[0].resultEndTime &&
stream.results[0].resultEndTime.nanos &&
stream.results[0].resultEndTime.seconds &&
stream.results[0].alternatives &&
stream.results[0].isFinal
) {
// Convert API result end time from seconds + nanoseconds to milliseconds
// The below seconds are useful to see the timestamps in the console
let seconds: number;
if (typeof stream.results[0].resultEndTime.seconds === 'string')
seconds = parseInt(stream.results[0].resultEndTime.seconds);
else if (Long.isLong(stream.results[0].resultEndTime.seconds))
seconds = stream.results[0].resultEndTime.seconds.toNumber();
else seconds = stream.results[0].resultEndTime.seconds;
this.resultEndTime =
seconds * 1000 +
Math.round(stream.results[0].resultEndTime.nanos / 1000000);
// Calculate correct time based on offset from audio sent twice
const correctedTime =
this.resultEndTime -
this.bridgingOffset +
this.streamingLimit * this.restartCounter;
process.stdout.clearLine(0);
process.stdout.cursorTo(0);
let stdoutText = '';
if (stream.results[0] && stream.results[0].alternatives[0]) {
stdoutText =
correctedTime + ': ' + stream.results[0].alternatives[0].transcript;
}
if (stream.results[0].isFinal) {
process.stdout.write(chalk.green(`${stdoutText}\n`));
this.socket.emit(
'speechData',
stream.results[0].alternatives[0].transcript
);
this.isFinalEndTime = this.resultEndTime;
this.lastTranscriptWasFinal = true;
} else {
// Make sure transcript does not exceed console character length
if (stdoutText.length > process.stdout.columns) {
stdoutText =
stdoutText.substring(0, process.stdout.columns - 4) + '...';
}
process.stdout.write(chalk.red(`${stdoutText}`));
this.lastTranscriptWasFinal = false;
}
}
}
}
export default new SpeechToTextUtils();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment