Skip to content

Instantly share code, notes, and snippets.

@amosgyamfi
Created November 12, 2025 12:27
Show Gist options
  • Select an option

  • Save amosgyamfi/05c6c7af1ca547fee4d3f406df4427f0 to your computer and use it in GitHub Desktop.

Select an option

Save amosgyamfi/05c6c7af1ca547fee4d3f406df4427f0 to your computer and use it in GitHub Desktop.
import asyncio
import logging
from uuid import uuid4
from dotenv import load_dotenv
from vision_agents.core.edge.types import User
from vision_agents.core.agents import Agent
from vision_agents.plugins import getstream, elevenlabs, smart_turn, gemini
load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
async def start_agent() -> None:
"""
Example demonstrating ElevenLabs Scribe v2 Realtime STT integration with Vision Agents.
This example creates an agent that uses:
- ElevenLabs for text-to-speech (TTS)
- GetStream for edge/real-time communication
- ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
- Gemini for language model
Requirements:
- ElevenLabs_API_KEY environment variable
- STREAM_API_KEY and STREAM_API_SECRET environment variables
- GEMINI_API_KEY environment variable
- DEEPGRAM_API_KEY environment variable
"""
agent = Agent(
edge=getstream.Edge(),
agent_user=User(name="Friendly AI", id="agent"),
instructions="You're a general-purpose voice AI assistant. You can help with a wide range of tasks, from answering questions to providing information on a wide range of topics. You can also help with tasks such as setting reminders, creating to-do lists, and more.",
tts=elevenlabs.TTS(), # ElevenLabs for text-to-speech (TTS)
stt=elevenlabs.STT(), # ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
llm=gemini.LLM("gemini-2.0-flash"),
turn_detection=smart_turn.TurnDetection(),
)
await agent.create_user()
call = agent.edge.client.video.call("default", str(uuid4()))
await agent.edge.open_demo(call)
with await agent.join(call):
await asyncio.sleep(5)
await agent.llm.simple_response(text="Hello! I'm using ElevenLabs Scribe v2 Realtime for speech-to-text. How can I help you today?")
await agent.finish()
if __name__ == "__main__":
asyncio.run(start_agent())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment