Created
November 12, 2025 12:27
-
-
Save amosgyamfi/05c6c7af1ca547fee4d3f406df4427f0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio | |
| import logging | |
| from uuid import uuid4 | |
| from dotenv import load_dotenv | |
| from vision_agents.core.edge.types import User | |
| from vision_agents.core.agents import Agent | |
| from vision_agents.plugins import getstream, elevenlabs, smart_turn, gemini | |
| load_dotenv() | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| async def start_agent() -> None: | |
| """ | |
| Example demonstrating ElevenLabs Scribe v2 Realtime STT integration with Vision Agents. | |
| This example creates an agent that uses: | |
| - ElevenLabs for text-to-speech (TTS) | |
| - GetStream for edge/real-time communication | |
| - ElevenLabs Scribe v2 Realtime for speech-to-text (STT) | |
| - Gemini for language model | |
| Requirements: | |
| - ElevenLabs_API_KEY environment variable | |
| - STREAM_API_KEY and STREAM_API_SECRET environment variables | |
| - GEMINI_API_KEY environment variable | |
| - DEEPGRAM_API_KEY environment variable | |
| """ | |
| agent = Agent( | |
| edge=getstream.Edge(), | |
| agent_user=User(name="Friendly AI", id="agent"), | |
| instructions="You're a general-purpose voice AI assistant. You can help with a wide range of tasks, from answering questions to providing information on a wide range of topics. You can also help with tasks such as setting reminders, creating to-do lists, and more.", | |
| tts=elevenlabs.TTS(), # ElevenLabs for text-to-speech (TTS) | |
| stt=elevenlabs.STT(), # ElevenLabs Scribe v2 Realtime for speech-to-text (STT) | |
| llm=gemini.LLM("gemini-2.0-flash"), | |
| turn_detection=smart_turn.TurnDetection(), | |
| ) | |
| await agent.create_user() | |
| call = agent.edge.client.video.call("default", str(uuid4())) | |
| await agent.edge.open_demo(call) | |
| with await agent.join(call): | |
| await asyncio.sleep(5) | |
| await agent.llm.simple_response(text="Hello! I'm using ElevenLabs Scribe v2 Realtime for speech-to-text. How can I help you today?") | |
| await agent.finish() | |
| if __name__ == "__main__": | |
| asyncio.run(start_agent()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment