Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save willsmanley/7b942ae817ccb4a49ef27b33fec30c27 to your computer and use it in GitHub Desktop.
Save willsmanley/7b942ae817ccb4a49ef27b33fec30c27 to your computer and use it in GitHub Desktop.
```ai_cubit.dart
import 'dart:async';
import 'dart:developer';
import 'dart:typed_data';
import 'package:bloc/bloc.dart';
import 'package:firebase_ai/firebase_ai.dart';
import 'package:firebase_auth/firebase_auth.dart';
import 'package:record/record.dart';
import 'audio_stream_manager.dart';
part 'ai_state.dart';
/// The Cubit responsible for handling all logic related to
/// real-time AI voice chat, including recording, streaming,
/// sending audio to Vertex AI, and playing AI responses.
class AICubit extends Cubit<AIState> {
// Constructor: immediately initializes AI and session on creation.
AICubit() : super(const AIState()) {
initialize();
}
/// Vertex AI instance using Firebase Auth for secure access.
final vertexAI = FirebaseAI.vertexAI(auth: FirebaseAuth.instance);
/// Configuration for live AI generation:
/// - We want audio responses (ResponseModalities.audio)
/// - 'Kore' is the voice name to use
final generationConfig = LiveGenerationConfig(
responseModalities: [ResponseModalities.audio],
speechConfig: SpeechConfig(voiceName: 'Kore'),
);
// The system instruction sets the personality and boundaries for the AI.
late final Content systemInstruction;
// The actual live generative model instance from Vertex AI.
late final LiveGenerativeModel model;
// The session for continuous streaming with Vertex AI.
late LiveSession _session;
// For recording user audio.
final _audioRecorder = AudioRecorder();
// For playing back AI audio responses.
final _audioManager = AudioStreamManager();
// Builds up audio data to batch before playback.
final _chunkBuilder = BytesBuilder();
// Counter for chunk batching.
var _audioIndex = 0;
// Controls stopping of the background AI message listener.
StreamController<bool> _stopController = StreamController<bool>();
// Indicates whether the microphone data should be forwarded to the AI.
// We keep the recorder running for fast resume but selectively drop chunks
// when the AI itself is speaking to avoid echo-loops on some platforms.
bool _micEnabled = true;
// Indicates whether the AI is currently speaking so that the UI can react
// and developers can debug.
bool _aiSpeaking = false;
// Tracks the remaining playback time (in ms) for the audio that was already
// queued in the player so we can resume the mic only after it ends.
int _pendingPlaybackMs = 0;
static const int _bytesPerMs = 48; // 24kHz * 1 channel * 2 bytes / 1000
// Convenience helpers for logging & state updates.
Future<void> _pauseRecordingWithLog() async {
if (!_aiSpeaking) {
log('[AI_CUBIT] Pausing mic – AI started speaking');
_aiSpeaking = true;
_micEnabled = false;
_pendingPlaybackMs = 0; // reset counter for this turn
try {
await _audioRecorder.pause();
} catch (e) {
// Some platforms may not implement pause. We ignore and rely on gating.
log('[AI_CUBIT] Recorder pause not supported: $e');
}
emit(state.copyWith(aiSpeaking: true));
}
}
Future<void> _resumeRecordingWithLog() async {
if (_aiSpeaking) {
log('[AI_CUBIT] Resuming mic – AI turn complete');
_aiSpeaking = false;
// Add a short delay to ensure the last playback buffer is finished.
await Future.delayed(const Duration(milliseconds: 300));
_micEnabled = true;
try {
await _audioRecorder.resume();
} catch (e) {
log('[AI_CUBIT] Recorder resume not supported: $e');
}
emit(state.copyWith(aiSpeaking: false));
}
}
/// Entry point: Sets up the AI and starts the session.
void initialize() async {
_setupAI();
_setupSession();
}
/// Toggles recording state based on the current state.
void toggleRecording() async {
log("Toggling recording state: ${state.recording}");
if (state.recording) {
await _stopRecording();
} else {
await _startRecording();
}
}
/// Sets up the AI model with system instructions and config.
void _setupAI() {
final String instructions =
"Your name is Gemini, the AI assistant, your role is to assist the user within the app only.";
systemInstruction = Content.system(instructions);
// Initializes the live generative model with the desired configuration.
model = vertexAI.liveGenerativeModel(
model: 'gemini-2.0-flash-exp',
liveGenerationConfig: generationConfig,
systemInstruction: systemInstruction,
);
}
/// Starts audio recording, streams audio chunks to Vertex AI session.
Future<void> _startRecording() async {
emit(state.copyWith(recording: true));
try {
await _audioRecorder.hasPermission(); // Ensures we have mic permission
final audioRecordStream = startRecordingStream();
// Convert each PCM chunk to the format expected by Vertex AI
final mediaChunkStream = audioRecordStream.map((data) {
log("Received audio data chunk of size: ${data.length}");
return InlineDataPart('audio/pcm', data);
});
// Send the stream of audio to the AI session.
await _session.sendMediaStream(mediaChunkStream);
} catch (e) {
log("Error sending media stream: $e");
}
}
/// Handles session lifecycle: starts or stops session as needed.
Future<void> _setupSession() async {
if (!state.sessionOpening) {
// Start a new AI session if one isn't open.
_session = await model.connect();
emit(state.copyWith(sessionOpening: true));
_stopController = StreamController<bool>();
await _audioManager.initPlayer(); // Prepares audio player for streaming
// Start background task to handle incoming AI messages.
unawaited(
processMessagesContinuously(
stopSignal: _stopController,
),
);
} else {
// If session is open, close everything cleanly.
_stopController.add(true);
await _stopController.close();
await _session.close();
_audioManager.disposePlayer();
emit(state.copyWith(sessionOpening: false));
}
}
/// Continuously listens for AI server messages until a stop signal is received.
Future<void> processMessagesContinuously({
required StreamController<bool> stopSignal,
}) async {
bool shouldContinue = true;
// Listen for external stop signal to break the loop.
stopSignal.stream.listen((stop) {
if (stop) {
shouldContinue = false;
}
});
// Main loop: receive messages from the AI and process.
while (shouldContinue) {
try {
await for (final message in _session.receive()) {
await _handleLiveServerMessage(message);
}
} catch (e) {
// On error (disconnect), mark session as closed.
emit(state.copyWith(sessionOpening: false));
log(e.toString());
break;
}
// Short delay prevents tight busy-wait loops.
await Future.delayed(const Duration(milliseconds: 100));
}
}
/// Main handler for responses from Vertex AI.
Future<void> _handleLiveServerMessage(LiveServerResponse response) async {
final message = response.message;
if (message is LiveServerContent) {
// If AI is actively responding, pause the mic to prevent overlap.
if (message.modelTurn != null) {
await _pauseRecordingWithLog();
await _handleLiveServerContent(message);
}
// When the turn is done, resume mic for next input.
if (message.turnComplete != null && message.turnComplete!) {
log('Turn complete: $response');
await _handleTurnComplete();
// Resume will be scheduled by _handleTurnComplete after playback ends.
}
// Handle interruptions if needed.
if (message.interrupted != null && message.interrupted!) {
log('Interrupted: $response');
}
}
}
/// Parses live server content and processes each audio part.
Future<void> _handleLiveServerContent(LiveServerContent response) async {
final partList = response.modelTurn?.parts;
if (partList != null) {
for (final part in partList) {
if (part is InlineDataPart) {
await _handleInlineDataPart(part);
}
}
}
}
/// Handles each chunk of AI audio by buffering and sending to the player.
Future<void> _handleInlineDataPart(InlineDataPart part) async {
if (part.mimeType.startsWith('audio')) {
_chunkBuilder.add(part.bytes);
_audioIndex++;
// Calculate approximate playback duration for this chunk and accumulate.
final int durationMs = (part.bytes.length / _bytesPerMs).round();
_pendingPlaybackMs += durationMs;
log('Received message: ${part.bytes}');
// Play every 5 chunks for smoother streaming audio
if (_audioIndex == 5) {
_audioManager.feedAudioChunk(_chunkBuilder.toBytes());
_chunkBuilder.clear();
_audioIndex = 0;
}
}
}
/// When the AI's response turn is over, flush any remaining audio.
Future<void> _handleTurnComplete() async {
if (_chunkBuilder.isNotEmpty) {
_audioManager.feedAudioChunk(_chunkBuilder.toBytes());
// Include last buffer duration as well.
final int durationMs = (_chunkBuilder.length / _bytesPerMs).round();
_pendingPlaybackMs += durationMs;
_audioIndex = 0;
_chunkBuilder.clear();
}
// After flushing, schedule mic resume after the remaining playback time.
_scheduleResumeAfterPlayback();
}
void _scheduleResumeAfterPlayback() {
// Add a small cushion to ensure clean cutoff
const int cushionMs = 200;
final int delayMs = _pendingPlaybackMs + cushionMs;
log('[AI_CUBIT] Scheduling mic resume in $delayMs ms');
Future.delayed(Duration(milliseconds: delayMs), () async {
// Only resume if we are still in AI speaking mode
if (_aiSpeaking) {
await _resumeRecordingWithLog();
}
});
}
/// Opens a PCM audio stream for real-time recording.
Stream<Uint8List> startRecordingStream() async* {
if (await _audioRecorder.hasPermission()) {
const recordConfig = RecordConfig(
encoder: AudioEncoder.pcm16bits,
sampleRate: 16000, // Match AI model input requirements
numChannels: 1,
);
final stream = await _audioRecorder.startStream(recordConfig);
await for (final data in stream) {
// Only forward microphone data when mic is enabled to avoid echo.
if (_micEnabled) {
yield data;
}
}
} else {
log("No permission to record audio");
}
}
/// Stops audio recording and updates state.
Future<void> _stopRecording() async {
emit(state.copyWith(recording: false));
await _audioRecorder.stop();
}
/// Ensures all resources are properly cleaned up on dispose.
@override
Future<void> close() {
_audioRecorder.dispose();
_audioManager.disposePlayer();
_session.close();
_stopController.close();
return super.close();
}
}
```
```ai_state.dart
part of 'ai_cubit.dart';
/// The state model for our AI Cubit.
/// Tracks everything the UI needs to know about the assistant's session and recording status.
class AIState {
/// True if a live session with Vertex AI is active.
final bool sessionOpening;
/// True if the user is currently recording (microphone is on).
final bool recording;
/// True if the AI is currently sending an audio response.
final bool aiSpeaking;
/// Default constructor with all values false.
const AIState({
this.sessionOpening = false,
this.recording = false,
this.aiSpeaking = false,
});
/// Helper method to create a new state object
/// with one or more fields changed, keeping others the same.
AIState copyWith({
bool? sessionOpening,
bool? recording,
bool? aiSpeaking,
}) {
return AIState(
sessionOpening: sessionOpening ?? this.sessionOpening,
recording: recording ?? this.recording,
aiSpeaking: aiSpeaking ?? this.aiSpeaking,
);
}
}
```
```audio_stream_manager.dart
import 'dart:async';
import 'dart:typed_data';
import 'package:flutter_sound/flutter_sound.dart';
/// Handles all audio playback duties for streaming AI audio responses.
/// This class manages a low-latency audio player that can play audio chunks as soon as they're received,
/// ensuring near real-time voice interaction with the AI.
class AudioStreamManager {
// The FlutterSoundPlayer instance that streams and plays audio data.
final _player = FlutterSoundPlayer();
/// Initializes the audio player and configures it to accept live PCM stream data.
Future<void> initPlayer() async {
await _player.openPlayer();
await _player.startPlayerFromStream(
codec: Codec.pcm16, // Use PCM 16-bit encoding for raw audio data.
numChannels: 1, // Mono audio (one channel).
sampleRate: 24000, // 24kHz sample rate to match AI output.
interleaved:
true, // Audio bytes are interleaved; typical for PCM streams.
bufferSize:
1024, // Buffer size in bytes; adjust for best latency/performance balance.
);
}
/// Feeds a chunk of audio data (from the AI) to the player for immediate playback.
/// Called repeatedly as chunks arrive during the AI's audio response.
void feedAudioChunk(Uint8List chunk) {
_player.uint8ListSink?.add(chunk);
}
/// Stops audio playback (but keeps player open, for resuming later).
Future<void> stopPlayer() async {
await _player.stopPlayer();
}
/// Fully disposes and closes the audio player (should be called on session or app close).
Future<void> disposePlayer() async {
await _player.closePlayer();
}
}
```
```voice_assistant_page.dart
import 'package:flutter/material.dart';
import 'package:flutter_bloc/flutter_bloc.dart';
import 'ai_cubit.dart';
class VoiceAssistantPage extends StatelessWidget {
const VoiceAssistantPage({super.key});
@override
Widget build(BuildContext context) {
return BlocProvider(
create: (_) => AICubit(),
child: Scaffold(
appBar: AppBar(title: Text("AI Voice Assistant")),
body: BlocBuilder<AICubit, AIState>(
builder: (context, state) {
final cubit = context.read<AICubit>();
return Center(
child: Column(
mainAxisAlignment: MainAxisAlignment.center,
children: [
ElevatedButton.icon(
icon: Icon(state.recording ? Icons.stop : Icons.mic),
label: Text(state.recording ? "Stop" : "Record"),
onPressed: cubit.toggleRecording,
),
SizedBox(height: 24),
Text(state.sessionOpening
? "Session active"
: "Session not active"),
SizedBox(height: 12),
Text(state.aiSpeaking ? "AI speaking…" : "AI listening"),
],
),
);
},
),
),
);
}
}
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment