Created
June 27, 2024 14:33
-
-
Save theoknock/cd02da8f99bcdd7a8acb264b86266d44 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import SwiftUI | |
import Speech | |
import AVFoundation | |
import Combine | |
import Observation | |
@Observable | |
class SpeechRecognizer { | |
var transcription: String = "" | |
var isTranscribing: Bool = false | |
private var speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) | |
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? | |
private var recognitionTask: SFSpeechRecognitionTask? | |
private var audioEngine = AVAudioEngine() | |
func startTranscribing() { | |
SFSpeechRecognizer.requestAuthorization { authStatus in | |
switch authStatus { | |
case .authorized: | |
DispatchQueue.main.async { | |
self.isTranscribing = true | |
} | |
self.startRecording() | |
case .denied, | |
.restricted, | |
.notDetermined: | |
print("Speech recognition not authorized") | |
@unknown default: | |
fatalError("Unknown authorization status") | |
} | |
} | |
} | |
private func startRecording() { | |
recognitionTask?.cancel() | |
recognitionTask = nil | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) | |
try audioSession.setActive(true, options: .notifyOthersOnDeactivation) | |
} catch { | |
print("Failed to set up audio session") | |
return | |
} | |
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() | |
guard let recognitionRequest = recognitionRequest else { | |
print("Unable to create a recognition request") | |
return | |
} | |
recognitionRequest.shouldReportPartialResults = true | |
let inputNode = audioEngine.inputNode | |
let recordingFormat = inputNode.outputFormat(forBus: 0) | |
inputNode.removeTap(onBus: 0) | |
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in | |
self.recognitionRequest?.append(buffer) | |
} | |
audioEngine.prepare() | |
do { | |
try audioEngine.start() | |
} catch { | |
print("Audio engine couldn't start") | |
return | |
} | |
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in | |
if let result = result { | |
DispatchQueue.main.async { | |
self.transcription = result.bestTranscription.formattedString | |
} | |
} | |
if error != nil || result?.isFinal == true { | |
self.stopTranscribing() | |
} | |
} | |
} | |
func stopTranscribing() { | |
audioEngine.stop() | |
audioEngine.inputNode.removeTap(onBus: 0) | |
recognitionRequest?.endAudio() | |
recognitionTask?.cancel() | |
DispatchQueue.main.async { | |
self.isTranscribing = false | |
} | |
} | |
} | |
struct ContentView: View { | |
@State private var speechRecognizer = SpeechRecognizer() | |
var body: some View { | |
VStack { | |
Text(speechRecognizer.transcription) | |
.padding() | |
HStack { | |
Button(action: { | |
speechRecognizer.startTranscribing() | |
}) { | |
Text("Start Transcribing") | |
.padding() | |
.background(speechRecognizer.isTranscribing ? Color.gray : Color.blue) | |
.foregroundColor(.white) | |
.cornerRadius(10) | |
} | |
.disabled(speechRecognizer.isTranscribing) | |
Button(action: { | |
speechRecognizer.stopTranscribing() | |
}) { | |
Text("Stop Transcribing") | |
.padding() | |
.background(speechRecognizer.isTranscribing ? Color.red : Color.gray) | |
.foregroundColor(.white) | |
.cornerRadius(10) | |
} | |
.disabled(!speechRecognizer.isTranscribing) | |
} | |
} | |
.padding() | |
} | |
} | |
struct ContentView_Previews: PreviewProvider { | |
static var previews: some View { | |
ContentView() | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment