amosgyamfi · November 12, 2025 12:27
diff --git a/elevenlabs_scribe_v2_realtime_stt.py b/elevenlabs_scribe_v2_realtime_stt.py
 import asyncio
 import logging
 from uuid import uuid4

 from dotenv import load_dotenv

 from vision_agents.core.edge.types import User
 from vision_agents.core.agents import Agent
 from vision_agents.plugins import getstream, elevenlabs, smart_turn, gemini


 load_dotenv()

 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
 logger = logging.getLogger(__name__)


 async def start_agent() -> None:
    """
    Example demonstrating ElevenLabs Scribe v2 Realtime STT integration with Vision Agents.
    
    This example creates an agent that uses:
    - ElevenLabs for text-to-speech (TTS)
    - GetStream for edge/real-time communication
    - ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
    - Gemini for language model
    
    Requirements:
    - ElevenLabs_API_KEY environment variable    
    - STREAM_API_KEY and STREAM_API_SECRET environment variables
    - GEMINI_API_KEY environment variable
    - DEEPGRAM_API_KEY environment variable
    """
    agent = Agent(
        edge=getstream.Edge(),
        agent_user=User(name="Friendly AI", id="agent"),
        instructions="You're a general-purpose voice AI assistant. You can help with a wide range of tasks, from answering questions to providing information on a wide range of topics. You can also help with tasks such as setting reminders, creating to-do lists, and more.",
        tts=elevenlabs.TTS(), # ElevenLabs for text-to-speech (TTS)
        stt=elevenlabs.STT(), # ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
        llm=gemini.LLM("gemini-2.0-flash"),
        turn_detection=smart_turn.TurnDetection(),
    )
    await agent.create_user()

    call = agent.edge.client.video.call("default", str(uuid4()))
    await agent.edge.open_demo(call)

    with await agent.join(call):
        await asyncio.sleep(5)
        await agent.llm.simple_response(text="Hello! I'm using ElevenLabs Scribe v2 Realtime for speech-to-text. How can I help you today?")
        await agent.finish()


 if __name__ == "__main__":
    asyncio.run(start_agent())
	import asyncio
	import logging
	from uuid import uuid4

	from dotenv import load_dotenv

	from vision_agents.core.edge.types import User
	from vision_agents.core.agents import Agent
	from vision_agents.plugins import getstream, elevenlabs, smart_turn, gemini


	load_dotenv()

	logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
	logger = logging.getLogger(__name__)


	async def start_agent() -> None:
	"""
	Example demonstrating ElevenLabs Scribe v2 Realtime STT integration with Vision Agents.

	This example creates an agent that uses:
	- ElevenLabs for text-to-speech (TTS)
	- GetStream for edge/real-time communication
	- ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
	- Gemini for language model

	Requirements:
	- ElevenLabs_API_KEY environment variable
	- STREAM_API_KEY and STREAM_API_SECRET environment variables
	- GEMINI_API_KEY environment variable
	- DEEPGRAM_API_KEY environment variable
	"""
	agent = Agent(
	edge=getstream.Edge(),
	agent_user=User(name="Friendly AI", id="agent"),
	instructions="You're a general-purpose voice AI assistant. You can help with a wide range of tasks, from answering questions to providing information on a wide range of topics. You can also help with tasks such as setting reminders, creating to-do lists, and more.",
	tts=elevenlabs.TTS(), # ElevenLabs for text-to-speech (TTS)
	stt=elevenlabs.STT(), # ElevenLabs Scribe v2 Realtime for speech-to-text (STT)
	llm=gemini.LLM("gemini-2.0-flash"),
	turn_detection=smart_turn.TurnDetection(),
	)
	await agent.create_user()

	call = agent.edge.client.video.call("default", str(uuid4()))
	await agent.edge.open_demo(call)

	with await agent.join(call):
	await asyncio.sleep(5)
	await agent.llm.simple_response(text="Hello! I'm using ElevenLabs Scribe v2 Realtime for speech-to-text. How can I help you today?")
	await agent.finish()


	if __name__ == "__main__":
	asyncio.run(start_agent())
No results found