Created
June 11, 2025 20:21
-
-
Save willsmanley/7b942ae817ccb4a49ef27b33fec30c27 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```ai_cubit.dart | |
import 'dart:async'; | |
import 'dart:developer'; | |
import 'dart:typed_data'; | |
import 'package:bloc/bloc.dart'; | |
import 'package:firebase_ai/firebase_ai.dart'; | |
import 'package:firebase_auth/firebase_auth.dart'; | |
import 'package:record/record.dart'; | |
import 'audio_stream_manager.dart'; | |
part 'ai_state.dart'; | |
/// The Cubit responsible for handling all logic related to | |
/// real-time AI voice chat, including recording, streaming, | |
/// sending audio to Vertex AI, and playing AI responses. | |
class AICubit extends Cubit<AIState> { | |
// Constructor: immediately initializes AI and session on creation. | |
AICubit() : super(const AIState()) { | |
initialize(); | |
} | |
/// Vertex AI instance using Firebase Auth for secure access. | |
final vertexAI = FirebaseAI.vertexAI(auth: FirebaseAuth.instance); | |
/// Configuration for live AI generation: | |
/// - We want audio responses (ResponseModalities.audio) | |
/// - 'Kore' is the voice name to use | |
final generationConfig = LiveGenerationConfig( | |
responseModalities: [ResponseModalities.audio], | |
speechConfig: SpeechConfig(voiceName: 'Kore'), | |
); | |
// The system instruction sets the personality and boundaries for the AI. | |
late final Content systemInstruction; | |
// The actual live generative model instance from Vertex AI. | |
late final LiveGenerativeModel model; | |
// The session for continuous streaming with Vertex AI. | |
late LiveSession _session; | |
// For recording user audio. | |
final _audioRecorder = AudioRecorder(); | |
// For playing back AI audio responses. | |
final _audioManager = AudioStreamManager(); | |
// Builds up audio data to batch before playback. | |
final _chunkBuilder = BytesBuilder(); | |
// Counter for chunk batching. | |
var _audioIndex = 0; | |
// Controls stopping of the background AI message listener. | |
StreamController<bool> _stopController = StreamController<bool>(); | |
// Indicates whether the microphone data should be forwarded to the AI. | |
// We keep the recorder running for fast resume but selectively drop chunks | |
// when the AI itself is speaking to avoid echo-loops on some platforms. | |
bool _micEnabled = true; | |
// Indicates whether the AI is currently speaking so that the UI can react | |
// and developers can debug. | |
bool _aiSpeaking = false; | |
// Tracks the remaining playback time (in ms) for the audio that was already | |
// queued in the player so we can resume the mic only after it ends. | |
int _pendingPlaybackMs = 0; | |
static const int _bytesPerMs = 48; // 24kHz * 1 channel * 2 bytes / 1000 | |
// Convenience helpers for logging & state updates. | |
Future<void> _pauseRecordingWithLog() async { | |
if (!_aiSpeaking) { | |
log('[AI_CUBIT] Pausing mic – AI started speaking'); | |
_aiSpeaking = true; | |
_micEnabled = false; | |
_pendingPlaybackMs = 0; // reset counter for this turn | |
try { | |
await _audioRecorder.pause(); | |
} catch (e) { | |
// Some platforms may not implement pause. We ignore and rely on gating. | |
log('[AI_CUBIT] Recorder pause not supported: $e'); | |
} | |
emit(state.copyWith(aiSpeaking: true)); | |
} | |
} | |
Future<void> _resumeRecordingWithLog() async { | |
if (_aiSpeaking) { | |
log('[AI_CUBIT] Resuming mic – AI turn complete'); | |
_aiSpeaking = false; | |
// Add a short delay to ensure the last playback buffer is finished. | |
await Future.delayed(const Duration(milliseconds: 300)); | |
_micEnabled = true; | |
try { | |
await _audioRecorder.resume(); | |
} catch (e) { | |
log('[AI_CUBIT] Recorder resume not supported: $e'); | |
} | |
emit(state.copyWith(aiSpeaking: false)); | |
} | |
} | |
/// Entry point: Sets up the AI and starts the session. | |
void initialize() async { | |
_setupAI(); | |
_setupSession(); | |
} | |
/// Toggles recording state based on the current state. | |
void toggleRecording() async { | |
log("Toggling recording state: ${state.recording}"); | |
if (state.recording) { | |
await _stopRecording(); | |
} else { | |
await _startRecording(); | |
} | |
} | |
/// Sets up the AI model with system instructions and config. | |
void _setupAI() { | |
final String instructions = | |
"Your name is Gemini, the AI assistant, your role is to assist the user within the app only."; | |
systemInstruction = Content.system(instructions); | |
// Initializes the live generative model with the desired configuration. | |
model = vertexAI.liveGenerativeModel( | |
model: 'gemini-2.0-flash-exp', | |
liveGenerationConfig: generationConfig, | |
systemInstruction: systemInstruction, | |
); | |
} | |
/// Starts audio recording, streams audio chunks to Vertex AI session. | |
Future<void> _startRecording() async { | |
emit(state.copyWith(recording: true)); | |
try { | |
await _audioRecorder.hasPermission(); // Ensures we have mic permission | |
final audioRecordStream = startRecordingStream(); | |
// Convert each PCM chunk to the format expected by Vertex AI | |
final mediaChunkStream = audioRecordStream.map((data) { | |
log("Received audio data chunk of size: ${data.length}"); | |
return InlineDataPart('audio/pcm', data); | |
}); | |
// Send the stream of audio to the AI session. | |
await _session.sendMediaStream(mediaChunkStream); | |
} catch (e) { | |
log("Error sending media stream: $e"); | |
} | |
} | |
/// Handles session lifecycle: starts or stops session as needed. | |
Future<void> _setupSession() async { | |
if (!state.sessionOpening) { | |
// Start a new AI session if one isn't open. | |
_session = await model.connect(); | |
emit(state.copyWith(sessionOpening: true)); | |
_stopController = StreamController<bool>(); | |
await _audioManager.initPlayer(); // Prepares audio player for streaming | |
// Start background task to handle incoming AI messages. | |
unawaited( | |
processMessagesContinuously( | |
stopSignal: _stopController, | |
), | |
); | |
} else { | |
// If session is open, close everything cleanly. | |
_stopController.add(true); | |
await _stopController.close(); | |
await _session.close(); | |
_audioManager.disposePlayer(); | |
emit(state.copyWith(sessionOpening: false)); | |
} | |
} | |
/// Continuously listens for AI server messages until a stop signal is received. | |
Future<void> processMessagesContinuously({ | |
required StreamController<bool> stopSignal, | |
}) async { | |
bool shouldContinue = true; | |
// Listen for external stop signal to break the loop. | |
stopSignal.stream.listen((stop) { | |
if (stop) { | |
shouldContinue = false; | |
} | |
}); | |
// Main loop: receive messages from the AI and process. | |
while (shouldContinue) { | |
try { | |
await for (final message in _session.receive()) { | |
await _handleLiveServerMessage(message); | |
} | |
} catch (e) { | |
// On error (disconnect), mark session as closed. | |
emit(state.copyWith(sessionOpening: false)); | |
log(e.toString()); | |
break; | |
} | |
// Short delay prevents tight busy-wait loops. | |
await Future.delayed(const Duration(milliseconds: 100)); | |
} | |
} | |
/// Main handler for responses from Vertex AI. | |
Future<void> _handleLiveServerMessage(LiveServerResponse response) async { | |
final message = response.message; | |
if (message is LiveServerContent) { | |
// If AI is actively responding, pause the mic to prevent overlap. | |
if (message.modelTurn != null) { | |
await _pauseRecordingWithLog(); | |
await _handleLiveServerContent(message); | |
} | |
// When the turn is done, resume mic for next input. | |
if (message.turnComplete != null && message.turnComplete!) { | |
log('Turn complete: $response'); | |
await _handleTurnComplete(); | |
// Resume will be scheduled by _handleTurnComplete after playback ends. | |
} | |
// Handle interruptions if needed. | |
if (message.interrupted != null && message.interrupted!) { | |
log('Interrupted: $response'); | |
} | |
} | |
} | |
/// Parses live server content and processes each audio part. | |
Future<void> _handleLiveServerContent(LiveServerContent response) async { | |
final partList = response.modelTurn?.parts; | |
if (partList != null) { | |
for (final part in partList) { | |
if (part is InlineDataPart) { | |
await _handleInlineDataPart(part); | |
} | |
} | |
} | |
} | |
/// Handles each chunk of AI audio by buffering and sending to the player. | |
Future<void> _handleInlineDataPart(InlineDataPart part) async { | |
if (part.mimeType.startsWith('audio')) { | |
_chunkBuilder.add(part.bytes); | |
_audioIndex++; | |
// Calculate approximate playback duration for this chunk and accumulate. | |
final int durationMs = (part.bytes.length / _bytesPerMs).round(); | |
_pendingPlaybackMs += durationMs; | |
log('Received message: ${part.bytes}'); | |
// Play every 5 chunks for smoother streaming audio | |
if (_audioIndex == 5) { | |
_audioManager.feedAudioChunk(_chunkBuilder.toBytes()); | |
_chunkBuilder.clear(); | |
_audioIndex = 0; | |
} | |
} | |
} | |
/// When the AI's response turn is over, flush any remaining audio. | |
Future<void> _handleTurnComplete() async { | |
if (_chunkBuilder.isNotEmpty) { | |
_audioManager.feedAudioChunk(_chunkBuilder.toBytes()); | |
// Include last buffer duration as well. | |
final int durationMs = (_chunkBuilder.length / _bytesPerMs).round(); | |
_pendingPlaybackMs += durationMs; | |
_audioIndex = 0; | |
_chunkBuilder.clear(); | |
} | |
// After flushing, schedule mic resume after the remaining playback time. | |
_scheduleResumeAfterPlayback(); | |
} | |
void _scheduleResumeAfterPlayback() { | |
// Add a small cushion to ensure clean cutoff | |
const int cushionMs = 200; | |
final int delayMs = _pendingPlaybackMs + cushionMs; | |
log('[AI_CUBIT] Scheduling mic resume in $delayMs ms'); | |
Future.delayed(Duration(milliseconds: delayMs), () async { | |
// Only resume if we are still in AI speaking mode | |
if (_aiSpeaking) { | |
await _resumeRecordingWithLog(); | |
} | |
}); | |
} | |
/// Opens a PCM audio stream for real-time recording. | |
Stream<Uint8List> startRecordingStream() async* { | |
if (await _audioRecorder.hasPermission()) { | |
const recordConfig = RecordConfig( | |
encoder: AudioEncoder.pcm16bits, | |
sampleRate: 16000, // Match AI model input requirements | |
numChannels: 1, | |
); | |
final stream = await _audioRecorder.startStream(recordConfig); | |
await for (final data in stream) { | |
// Only forward microphone data when mic is enabled to avoid echo. | |
if (_micEnabled) { | |
yield data; | |
} | |
} | |
} else { | |
log("No permission to record audio"); | |
} | |
} | |
/// Stops audio recording and updates state. | |
Future<void> _stopRecording() async { | |
emit(state.copyWith(recording: false)); | |
await _audioRecorder.stop(); | |
} | |
/// Ensures all resources are properly cleaned up on dispose. | |
@override | |
Future<void> close() { | |
_audioRecorder.dispose(); | |
_audioManager.disposePlayer(); | |
_session.close(); | |
_stopController.close(); | |
return super.close(); | |
} | |
} | |
``` | |
```ai_state.dart | |
part of 'ai_cubit.dart'; | |
/// The state model for our AI Cubit. | |
/// Tracks everything the UI needs to know about the assistant's session and recording status. | |
class AIState { | |
/// True if a live session with Vertex AI is active. | |
final bool sessionOpening; | |
/// True if the user is currently recording (microphone is on). | |
final bool recording; | |
/// True if the AI is currently sending an audio response. | |
final bool aiSpeaking; | |
/// Default constructor with all values false. | |
const AIState({ | |
this.sessionOpening = false, | |
this.recording = false, | |
this.aiSpeaking = false, | |
}); | |
/// Helper method to create a new state object | |
/// with one or more fields changed, keeping others the same. | |
AIState copyWith({ | |
bool? sessionOpening, | |
bool? recording, | |
bool? aiSpeaking, | |
}) { | |
return AIState( | |
sessionOpening: sessionOpening ?? this.sessionOpening, | |
recording: recording ?? this.recording, | |
aiSpeaking: aiSpeaking ?? this.aiSpeaking, | |
); | |
} | |
} | |
``` | |
```audio_stream_manager.dart | |
import 'dart:async'; | |
import 'dart:typed_data'; | |
import 'package:flutter_sound/flutter_sound.dart'; | |
/// Handles all audio playback duties for streaming AI audio responses. | |
/// This class manages a low-latency audio player that can play audio chunks as soon as they're received, | |
/// ensuring near real-time voice interaction with the AI. | |
class AudioStreamManager { | |
// The FlutterSoundPlayer instance that streams and plays audio data. | |
final _player = FlutterSoundPlayer(); | |
/// Initializes the audio player and configures it to accept live PCM stream data. | |
Future<void> initPlayer() async { | |
await _player.openPlayer(); | |
await _player.startPlayerFromStream( | |
codec: Codec.pcm16, // Use PCM 16-bit encoding for raw audio data. | |
numChannels: 1, // Mono audio (one channel). | |
sampleRate: 24000, // 24kHz sample rate to match AI output. | |
interleaved: | |
true, // Audio bytes are interleaved; typical for PCM streams. | |
bufferSize: | |
1024, // Buffer size in bytes; adjust for best latency/performance balance. | |
); | |
} | |
/// Feeds a chunk of audio data (from the AI) to the player for immediate playback. | |
/// Called repeatedly as chunks arrive during the AI's audio response. | |
void feedAudioChunk(Uint8List chunk) { | |
_player.uint8ListSink?.add(chunk); | |
} | |
/// Stops audio playback (but keeps player open, for resuming later). | |
Future<void> stopPlayer() async { | |
await _player.stopPlayer(); | |
} | |
/// Fully disposes and closes the audio player (should be called on session or app close). | |
Future<void> disposePlayer() async { | |
await _player.closePlayer(); | |
} | |
} | |
``` | |
```voice_assistant_page.dart | |
import 'package:flutter/material.dart'; | |
import 'package:flutter_bloc/flutter_bloc.dart'; | |
import 'ai_cubit.dart'; | |
class VoiceAssistantPage extends StatelessWidget { | |
const VoiceAssistantPage({super.key}); | |
@override | |
Widget build(BuildContext context) { | |
return BlocProvider( | |
create: (_) => AICubit(), | |
child: Scaffold( | |
appBar: AppBar(title: Text("AI Voice Assistant")), | |
body: BlocBuilder<AICubit, AIState>( | |
builder: (context, state) { | |
final cubit = context.read<AICubit>(); | |
return Center( | |
child: Column( | |
mainAxisAlignment: MainAxisAlignment.center, | |
children: [ | |
ElevatedButton.icon( | |
icon: Icon(state.recording ? Icons.stop : Icons.mic), | |
label: Text(state.recording ? "Stop" : "Record"), | |
onPressed: cubit.toggleRecording, | |
), | |
SizedBox(height: 24), | |
Text(state.sessionOpening | |
? "Session active" | |
: "Session not active"), | |
SizedBox(height: 12), | |
Text(state.aiSpeaking ? "AI speaking…" : "AI listening"), | |
], | |
), | |
); | |
}, | |
), | |
), | |
); | |
} | |
} | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment