|
import SwiftUI |
|
import AVFoundation |
|
|
|
struct ContentView: View { |
|
@State private var isRecording = false |
|
@State private var transcription = "" |
|
@State private var recordedAudioURL: URL? |
|
@State private var responseAudioURL: URL? |
|
@State private var isConnected = false |
|
@State private var accumulatedAudioData = Data() |
|
@State private var webSocketTask: URLSessionWebSocketTask? |
|
|
|
private let audioEngine = AVAudioEngine() |
|
|
|
var body: some View { |
|
VStack { |
|
Image(systemName: isRecording ? "mic.fill" : "mic") |
|
.imageScale(.large) |
|
.foregroundStyle(.tint) |
|
Text(isRecording ? "Recording..." : "Start Recording") |
|
Button(action: toggleRecording) { |
|
Text(isRecording ? "Stop" : "Start") |
|
.padding() |
|
.background(isRecording ? Color.red : Color.blue) |
|
.foregroundColor(.white) |
|
.cornerRadius(10) |
|
} |
|
Button(action: commitAndCreateResponse) { |
|
Text("Send") |
|
.padding() |
|
.background(Color.green) |
|
.foregroundColor(.white) |
|
.cornerRadius(10) |
|
} |
|
Text(transcription) |
|
.padding() |
|
Button(action: playRecordedAudio) { |
|
Text("Play") |
|
.padding() |
|
.background(Color.orange) |
|
.foregroundColor(.white) |
|
.cornerRadius(10) |
|
} |
|
.disabled(recordedAudioURL == nil) |
|
Button(action: playResponseAudio) { |
|
Text("Play Response") |
|
.padding() |
|
.background(Color.purple) |
|
.foregroundColor(.white) |
|
.cornerRadius(10) |
|
} |
|
.disabled(responseAudioURL == nil) |
|
} |
|
.padding() |
|
.onAppear { |
|
setupAudioSession() |
|
setupWebSocket() |
|
} |
|
} |
|
private func setupAudioSession() { |
|
do { |
|
let audioSession = AVAudioSession.sharedInstance() |
|
try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .mixWithOthers]) |
|
try audioSession.setActive(true) |
|
} catch { |
|
print("Audio session error: \(error)") |
|
} |
|
} |
|
|
|
private func setupWebSocket() { |
|
guard let url = URL(string: "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01") else { return } |
|
var request = URLRequest(url: url) |
|
if let apiKey = Bundle.main.object(forInfoDictionaryKey: "OPENAI_API_KEY") ?? ProcessInfo.processInfo.environment["OPENAI_API_KEY"] { |
|
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") |
|
} else { |
|
print("Error: OPENAI_API_KEY not found in environment variables") |
|
return |
|
} |
|
request.setValue("realtime=v1", forHTTPHeaderField: "OpenAI-Beta") |
|
|
|
webSocketTask = URLSession.shared.webSocketTask(with: request) |
|
receiveMessage() |
|
webSocketTask?.resume() |
|
isConnected = true |
|
} |
|
|
|
private func receiveMessage() { |
|
webSocketTask?.receive { result in |
|
switch result { |
|
case .success(let message): |
|
switch message { |
|
case .string(let text): |
|
self.handleReceivedText(text) |
|
case .data: |
|
print("Received data message") |
|
@unknown default: |
|
break |
|
} |
|
self.receiveMessage() |
|
case .failure(let error): |
|
print("WebSocket error: \(error)") |
|
} |
|
} |
|
} |
|
|
|
private func handleReceivedText(_ text: String) { |
|
if let data = text.data(using: .utf8), |
|
let json = try? JSONSerialization.jsonObject(with: data, options: []) as? [String: Any] { |
|
if let item = json["item"] as? [String: Any], |
|
let content = item["content"] as? [[String: Any]], |
|
let firstContent = content.first, |
|
let transcript = firstContent["transcript"] as? String { |
|
DispatchQueue.main.async { |
|
self.transcription = transcript |
|
} |
|
} else if let type = json["type"] as? String { |
|
switch type { |
|
case "response.audio.delta": |
|
if let delta = json["delta"] as? String { |
|
handleAudioDelta(delta) |
|
} |
|
case "response.audio.done": |
|
convertAccumulatedAudioToFile() |
|
default: |
|
print("handleReceivedText:other_types:\(type)") |
|
} |
|
} |
|
} |
|
} |
|
|
|
private func handleAudioDelta(_ base64Audio: String) { |
|
guard let audioData = Data(base64Encoded: base64Audio) else { |
|
print("Failed to decode Base64 audio data") |
|
return |
|
} |
|
accumulatedAudioData.append(audioData) |
|
} |
|
|
|
private func convertAccumulatedAudioToFile() { |
|
let sampleRate = 24000.0 |
|
|
|
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: sampleRate, channels: 1, interleaved: false)! |
|
|
|
let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] |
|
let outputURL = documentsPath.appendingPathComponent("responseAudio.wav") |
|
|
|
do { |
|
let audioFile = try AVAudioFile(forWriting: outputURL, settings: format.settings) |
|
|
|
let frameCount = UInt32(accumulatedAudioData.count) / 4 |
|
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)! |
|
buffer.frameLength = frameCount |
|
|
|
accumulatedAudioData.withUnsafeBytes { rawBufferPointer in |
|
let int16BufferPointer = rawBufferPointer.bindMemory(to: Int16.self) |
|
for i in 0..<Int(frameCount) { |
|
let sample = Float(int16BufferPointer[i]) / Float(Int16.max) |
|
buffer.floatChannelData?[0][i] = sample |
|
} |
|
} |
|
|
|
try audioFile.write(from: buffer) |
|
|
|
DispatchQueue.main.async { |
|
self.responseAudioURL = outputURL |
|
} |
|
} catch { |
|
print("Failed to save response audio: \(error)") |
|
} |
|
|
|
accumulatedAudioData = Data() |
|
} |
|
|
|
private func toggleRecording() { |
|
if isRecording { |
|
stopRecording() |
|
} else { |
|
setupWebSocket() |
|
startRecording() |
|
} |
|
} |
|
|
|
private func startRecording() { |
|
let inputNode = audioEngine.inputNode |
|
let recordingFormat = inputNode.outputFormat(forBus: 0) |
|
|
|
let documentsPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] |
|
recordedAudioURL = documentsPath.appendingPathComponent("recordedAudio.wav") |
|
|
|
let file = try! AVAudioFile(forWriting: recordedAudioURL!, settings: recordingFormat.settings) |
|
|
|
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in |
|
try? file.write(from: buffer) |
|
self.sendAudioData(buffer) |
|
} |
|
|
|
audioEngine.prepare() |
|
do { |
|
try audioEngine.start() |
|
isRecording = true |
|
} catch { |
|
print("Failed to start audio engine: \(error)") |
|
} |
|
} |
|
|
|
private func stopRecording() { |
|
audioEngine.stop() |
|
audioEngine.inputNode.removeTap(onBus: 0) |
|
isRecording = false |
|
webSocketTask?.cancel(with: .goingAway, reason: nil) |
|
} |
|
|
|
private func pcmBufferToBase64(pcmBuffer: AVAudioPCMBuffer) -> String? { |
|
guard let channelData = pcmBuffer.floatChannelData else { |
|
return nil |
|
} |
|
|
|
let frameLength = Int(pcmBuffer.frameLength) |
|
let channelCount = Int(pcmBuffer.format.channelCount) |
|
let dataSize = frameLength * channelCount * MemoryLayout<Float>.size |
|
let audioData = UnsafeMutableRawPointer(mutating: channelData).bindMemory(to: Float.self, capacity: dataSize) |
|
|
|
// Create Data object from PCM buffer |
|
let data = Data(bytes: audioData, count: dataSize) |
|
|
|
// Convert Data to Base64 string |
|
return data.base64EncodedString() |
|
} |
|
|
|
private func sendAudioData(_ buffer: AVAudioPCMBuffer) { |
|
guard isConnected else { |
|
print("WebSocket is not connected. Cannot send audio data.") |
|
return |
|
} |
|
|
|
guard let base64Audio = self.pcmBufferToBase64(pcmBuffer: buffer) else { |
|
print("Failed to convert PCM buffer to Base64 string.") |
|
return |
|
} |
|
|
|
let event: [String: Any] = [ |
|
"type": "input_audio_buffer.append", |
|
"audio": base64Audio |
|
] |
|
|
|
if let jsonData = try? JSONSerialization.data(withJSONObject: event), |
|
let jsonString = String(data: jsonData, encoding: .utf8) { |
|
webSocketTask?.send(.string(jsonString)) { error in |
|
if let error = error { |
|
print("Error sending message: \(error)") |
|
} |
|
} |
|
} |
|
} |
|
|
|
private func playRecordedAudio() { |
|
if isRecording { |
|
stopRecording() |
|
} |
|
|
|
guard let audioURL = recordedAudioURL else { |
|
print("No recorded audio URL found.") |
|
return |
|
} |
|
|
|
do { |
|
let audioFile = try AVAudioFile(forReading: audioURL) |
|
|
|
let playerNode = AVAudioPlayerNode() |
|
audioEngine.attach(playerNode) |
|
|
|
let mainMixerNode = audioEngine.mainMixerNode |
|
audioEngine.connect(playerNode, to: mainMixerNode, format: audioFile.processingFormat) |
|
|
|
playerNode.scheduleFile(audioFile, at: nil) |
|
|
|
if !audioEngine.isRunning { |
|
try audioEngine.start() |
|
} |
|
|
|
playerNode.play() |
|
} catch { |
|
print("Failed to play recorded audio: \(error)") |
|
} |
|
} |
|
|
|
private func playResponseAudio() { |
|
if isRecording { |
|
stopRecording() |
|
} |
|
|
|
guard let audioURL = responseAudioURL else { |
|
print("No response audio URL found.") |
|
return |
|
} |
|
|
|
do { |
|
let audioFile = try AVAudioFile(forReading: audioURL) |
|
let mainMixerNode = audioEngine.mainMixerNode |
|
let outputFormat = mainMixerNode.outputFormat(forBus: 0) |
|
|
|
let playerNode = AVAudioPlayerNode() |
|
audioEngine.attach(playerNode) |
|
audioEngine.connect(playerNode, to: mainMixerNode, format: outputFormat) |
|
|
|
playerNode.scheduleFile(audioFile, at: nil) |
|
|
|
if !audioEngine.isRunning { |
|
try audioEngine.start() |
|
} |
|
|
|
playerNode.play() |
|
} catch { |
|
print("Failed to play response audio: \(error)") |
|
} |
|
} |
|
|
|
private func commitAndCreateResponse() { |
|
guard isConnected else { |
|
print("WebSocket is not connected. Cannot commit and create response.") |
|
return |
|
} |
|
|
|
let instructions = "Please assist the user." |
|
|
|
let commitEvent: [String: Any] = [ |
|
"type": "input_audio_buffer.commit" |
|
] |
|
|
|
let responseEvent: [String: Any] = [ |
|
"type": "response.create", |
|
"response": [ |
|
"modalities": ["audio", "text"], |
|
"instructions": instructions |
|
] |
|
] |
|
|
|
[commitEvent, responseEvent].forEach { event in |
|
if let jsonData = try? JSONSerialization.data(withJSONObject: event), |
|
let jsonString = String(data: jsonData, encoding: .utf8) { |
|
webSocketTask?.send(.string(jsonString)) { error in |
|
if let error = error { |
|
print("Error sending message: \(error)") |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
#Preview { |
|
ContentView() |
|
} |