Last active
March 17, 2025 12:22
-
-
Save alonsoir/c0867b4eb521c2ad050bcff2628680d5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
┌<▸> ~/g/python-samples-2025 | |
└➤ poetry run python src/python_samples_2025/rag_ollama_transcriber.py | |
❌ Ollama no está corriendo. Intentando iniciarlo... | |
⏳ Iniciando Ollama... | |
✅ Ollama está listo. | |
Modelos disponibles en Ollama: | |
1. my-codellama:latest | |
2. codellama:latest | |
3. neural-chat:latest | |
4. llama2:latest | |
5. mistral:latest | |
6. deepseek-r1:latest | |
7. hemanth/cybersecurityspecialist:latest | |
8. jimscard/blackhat-hacker:latest | |
9. llama3:latest | |
Selecciona el número del modelo que deseas usar: 1 | |
Modelo seleccionado: my-codellama:latest | |
🎤 Asistente RAG con Ollama activado. Habla para hacer una pregunta... | |
🎤 Escuchando... | |
🎤 Escuchando... | |
📝 Pregunta transcrita: Gracias por la intervención. | |
🎤 Escuchando... | |
🎤 Escuchando... | |
🎤 Escuchando... | |
🎤 Escuchando... | |
🤖 Respuesta: | |
The answer to the question "Gracias por la intervención" (Thank you for your assistance) is: | |
*My name is Jean and I live in Paris.* | |
📝 Pregunta transcrita: de París. | |
🎤 Escuchando... | |
🤖 Respuesta: | |
The answer to the question "de París" would be "Jean". | |
Error en el procesamiento: run loop already started | |
📝 Pregunta transcrita: ¿Cuál es la capital de París? | |
🎤 Escuchando... | |
🎤 Escuchando... | |
🤖 Respuesta: | |
The answer to the question "¿Cuál es la capital de París?" is "Paris". This is because the first document states that Jean lives in Paris, which means that Paris is the capital of itself. | |
Error en el procesamiento: run loop already started | |
📝 Pregunta transcrita: ¿Cuál es la capital de París? | |
🎤 Escuchando... | |
🤖 Respuesta: The answer to the question "¿Cuál es la capital de París?" is "París". | |
Error en el procesamiento: run loop already started | |
🎤 Escuchando... | |
📝 Pregunta transcrita: Subtítulos realizados por la comunidad de Amara.org | |
🎤 Escuchando... | |
🎤 Escuchando... | |
🤖 Respuesta: | |
The question is asking for the subtitles made by the community of Amara.org. The answer would be the documents provided, which are: | |
* My name is Jean and I live in Paris. | |
* My name is Mark and I live in Berlin. | |
* My name is Giorgio and I live in Rome. | |
Error en el procesamiento: run loop already started | |
📝 Pregunta transcrita: Subtítulos realizados por la comunidad de Amara.org | |
🎤 Escuchando... | |
^CDeteniendo el asistente... | |
┌<▸> ~/g/python-samples-2025 | |
└➤ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from haystack import Pipeline, Document | |
from haystack.utils import Secret | |
from haystack.document_stores.in_memory import InMemoryDocumentStore | |
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever | |
from haystack.components.generators import OpenAIGenerator | |
from haystack.components.builders.prompt_builder import PromptBuilder | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
raise ValueError("No se encontró la API Key. Verifica tu archivo .env") | |
print(f"API Key cargada correctamente. ") | |
api_key = Secret.from_token(api_key) # Esta mierda requiere construir un objeto secret! | |
# Write documents to InMemoryDocumentStore | |
document_store = InMemoryDocumentStore() | |
document_store.write_documents([ | |
Document(content="My name is Jean and I live in Paris."), | |
Document(content="My name is Mark and I live in Berlin."), | |
Document(content="My name is Giorgio and I live in Rome.") | |
]) | |
# Build a RAG pipeline | |
prompt_template = """ | |
Given these documents, answer the question. | |
Documents: | |
{% for doc in documents %} | |
{{ doc.content }} | |
{% endfor %} | |
Question: {{question}} | |
Answer: | |
""" | |
retriever = InMemoryBM25Retriever(document_store=document_store) | |
prompt_builder = PromptBuilder(template=prompt_template) | |
llm = OpenAIGenerator(api_key=api_key) | |
rag_pipeline = Pipeline() | |
rag_pipeline.add_component("retriever", retriever) | |
rag_pipeline.add_component("prompt_builder", prompt_builder) | |
rag_pipeline.add_component("llm", llm) | |
rag_pipeline.connect("retriever", "prompt_builder.documents") | |
rag_pipeline.connect("prompt_builder", "llm") | |
# Ask a question | |
question = "Who lives in Paris?" | |
results = rag_pipeline.run( | |
{ | |
"retriever": {"query": question}, | |
"prompt_builder": {"question": question}, | |
} | |
) | |
print(f"question is {question}") | |
print(results["llm"]["replies"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
┌<▸> ~/g/python-samples-2025 | |
└➤ poetry run python src/python_samples_2025/haystack-transcriber.py | |
Inicializando sistema RAG... | |
🎤 Asistente RAG activado. Habla para hacer una pregunta... | |
📝 Pregunta transcrita: ¿Quién vive en París? | |
🤖 Respuesta: Jean vive en París. | |
📝 Pregunta transcrita: Yes, sir. Thank you. Thank you. | |
🤖 Respuesta: Based on the documents provided, the answer to the question "Yes, sir. Thank you. Thank you." is unrelated to the information given about Jean, Mark, and Giorgio living in Paris, Berlin, and Rome respectively. | |
Error en el procesamiento RAG: run loop already started | |
Error en el procesamiento: run loop already started | |
📝 Pregunta transcrita: Who lives in Paris? | |
🤖 Respuesta: Jean lives in Paris. | |
Error en el procesamiento RAG: run loop already started | |
Error en el procesamiento: run loop already started | |
^CDeteniendo el asistente... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[project] | |
name = "python-samples-2025" | |
version = "0.1.0" | |
description = "" | |
authors = [ | |
{name = "Alonso Isidoro Román",email = "[email protected]"} | |
] | |
readme = "README.md" | |
requires-python = "3.10.16" | |
dependencies = [ | |
"haystack-ai>=2.11.0,<3.0.0", | |
"python-dotenv>=1.0.1,<2.0.0", | |
"pyaudio>=0.2.14,<0.3.0", | |
"soundfile>=0.13.1,<0.14.0", | |
"pyttsx3>=2.98,<3.0", | |
"numpy>=1.21.0,<2.0.0", | |
"sounddevice (>=0.5.1,<0.6.0)", | |
"openai (>=1.66.3,<2.0.0)" | |
] | |
[tool.poetry] | |
packages = [{include = "python_samples_2025", from = "src"}] | |
[build-system] | |
requires = ["poetry-core>=2.0.0,<3.0.0"] | |
build-backend = "poetry.core.masonry.api" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyaudio | |
import numpy as np | |
import queue | |
import threading | |
import time | |
import pyttsx3 | |
import openai | |
import tempfile | |
import os | |
import requests | |
import json | |
import soundfile as sf | |
from dotenv import load_dotenv | |
import subprocess | |
# Cargar variables de entorno | |
load_dotenv() | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Configuración de Ollama | |
OLLAMA_URL = "http://localhost:11434/api/generate" | |
OLLAMA_MODEL = "llama3" # Valor inicial, se actualizará con la selección | |
# Documentos de ejemplo | |
documents = [ | |
"My name is Jean and I live in Paris.", | |
"My name is Mark and I live in Berlin.", | |
"My name is Giorgio and I live in Rome." | |
] | |
# Configuración de audio | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 16000 | |
CHUNK = 1024 | |
LISTEN_TIME = 5 | |
# Inicializar PyAudio y la cola de audio | |
audio_queue = queue.Queue() | |
audio = pyaudio.PyAudio() | |
# Configurar el motor de voz | |
engine = pyttsx3.init() | |
engine.setProperty('rate', 150) | |
def simple_search(query, documents): | |
"""Busca documentos relevantes basados en la consulta.""" | |
relevant_docs = [] | |
query_words = set(query.lower().split()) | |
for doc in documents: | |
doc_words = set(doc.lower().split()) | |
if query_words.intersection(doc_words): | |
relevant_docs.append(doc) | |
return relevant_docs if relevant_docs else documents | |
def generate_response_ollama(query, relevant_docs): | |
"""Genera una respuesta usando Ollama basada en documentos relevantes.""" | |
prompt = f""" | |
Given these documents, answer the question. | |
Documents: | |
{chr(10).join(['- ' + doc for doc in relevant_docs])} | |
Question: {query} | |
Answer: | |
""" | |
data = {"model": OLLAMA_MODEL, "prompt": prompt, "stream": False} | |
try: | |
response = requests.post(OLLAMA_URL, json=data) | |
response.raise_for_status() | |
return response.json().get("response", "Lo siento, no pude generar una respuesta.") | |
except requests.exceptions.RequestException as e: | |
print(f"Error al llamar a Ollama: {e}") | |
return f"Error al comunicarse con Ollama: {e}" | |
def record_audio(): | |
"""Graba audio del micrófono y lo coloca en la cola.""" | |
try: | |
stream = audio.open(format=FORMAT, channels=CHANNELS, | |
rate=RATE, input=True, | |
frames_per_buffer=CHUNK) | |
print("🎤 Asistente RAG con Ollama activado. Habla para hacer una pregunta...") | |
while True: | |
print("🎤 Escuchando...") | |
frames = [] | |
start_time = time.time() | |
while time.time() - start_time < LISTEN_TIME: | |
data = stream.read(CHUNK, exception_on_overflow=False) | |
frames.append(data) | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
audio_data = np.frombuffer(b''.join(frames), dtype=np.int16) | |
sf.write(temp_file.name, audio_data, RATE) | |
audio_queue.put(temp_file.name) | |
except Exception as e: | |
print(f"Error en la grabación: {e}") | |
finally: | |
stream.stop_stream() | |
stream.close() | |
audio.terminate() | |
def process_audio(): | |
"""Procesa el audio de la cola, lo transcribe y genera una respuesta.""" | |
while True: | |
audio_file_path = audio_queue.get() | |
try: | |
if os.path.exists(audio_file_path) and os.path.getsize(audio_file_path) > 0: | |
with open(audio_file_path, "rb") as audio_file: | |
transcription = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file | |
) | |
question = transcription.text.strip() | |
os.unlink(audio_file_path) | |
if question: | |
print(f"📝 Pregunta transcrita: {question}") | |
relevant_docs = simple_search(question, documents) | |
reply = generate_response_ollama(question, relevant_docs) | |
print(f"🤖 Respuesta: {reply}") | |
engine.say(reply) | |
engine.runAndWait() | |
else: | |
print("Archivo de audio vacío o no existe") | |
except Exception as e: | |
print(f"Error en el procesamiento: {e}") | |
try: | |
os.unlink(audio_file_path) | |
except: | |
pass | |
def ensure_ollama_running(): | |
"""Verifica si Ollama está corriendo y lo inicia si no lo está.""" | |
try: | |
response = requests.get("http://localhost:11434/api/tags", timeout=5) | |
if response.status_code == 200: | |
print("✅ Ollama ya está corriendo.") | |
return True | |
except requests.exceptions.ConnectionError: | |
print("❌ Ollama no está corriendo. Intentando iniciarlo...") | |
return start_ollama() | |
def start_ollama(): | |
"""Inicia Ollama en segundo plano.""" | |
try: | |
subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
print("⏳ Iniciando Ollama...") | |
return wait_for_ollama() | |
except Exception as e: | |
print(f"⚠️ Error al iniciar Ollama: {e}") | |
print("Asegúrate de que Ollama esté instalado y accesible desde la línea de comandos.") | |
return False | |
def wait_for_ollama(): | |
"""Espera hasta que Ollama esté listo.""" | |
max_attempts = 10 | |
attempt = 0 | |
while attempt < max_attempts: | |
try: | |
response = requests.get("http://localhost:11434/api/tags", timeout=5) | |
if response.status_code == 200: | |
print("✅ Ollama está listo.") | |
return True | |
except requests.exceptions.ConnectionError: | |
time.sleep(1) | |
attempt += 1 | |
print("❌ No se pudo conectar a Ollama después de varios intentos.") | |
return False | |
def check_ollama(): | |
"""Verifica Ollama, selecciona un modelo y asegura que esté corriendo.""" | |
global OLLAMA_MODEL | |
if not ensure_ollama_running(): | |
print("❌ No se pudo iniciar Ollama. El programa no puede continuar.") | |
exit(1) | |
try: | |
response = requests.get("http://localhost:11434/api/tags") | |
if response.status_code == 200: | |
models = response.json().get("models", []) | |
if models: | |
available_models = [model["name"] for model in models] | |
print("Modelos disponibles en Ollama:") | |
for i, model in enumerate(available_models, 1): | |
print(f"{i}. {model}") | |
while True: | |
try: | |
choice = input("Selecciona el número del modelo que deseas usar: ") | |
selected_index = int(choice) - 1 | |
if 0 <= selected_index < len(available_models): | |
OLLAMA_MODEL = available_models[selected_index] | |
print(f"Modelo seleccionado: {OLLAMA_MODEL}") | |
break | |
else: | |
print("Número inválido. Intenta de nuevo.") | |
except ValueError: | |
print("Por favor, ingresa un número válido.") | |
else: | |
print("No hay modelos disponibles en Ollama. Instala un modelo con: ollama pull llama3") | |
else: | |
print("Error al conectar con Ollama") | |
except requests.exceptions.RequestException: | |
print("⚠️ No se puede conectar a Ollama. Asegúrate de que esté ejecutándose con: ollama serve") | |
# Asegurarse de que Ollama esté corriendo y seleccionar modelo | |
check_ollama() | |
# Ejecutar en hilos separados | |
record_thread = threading.Thread(target=record_audio, daemon=True) | |
process_thread = threading.Thread(target=process_audio, daemon=True) | |
record_thread.start() | |
process_thread.start() | |
try: | |
while True: | |
time.sleep(1) | |
except KeyboardInterrupt: | |
print("Deteniendo el asistente...") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyaudio | |
import numpy as np | |
import queue | |
import threading | |
import time | |
import pyttsx3 | |
import openai | |
import tempfile | |
import os | |
import soundfile as sf | |
from dotenv import load_dotenv | |
# Cargar variables de entorno | |
load_dotenv() | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
if not openai.api_key: | |
raise ValueError("No se encontró la API Key. Verifica tu archivo .env") | |
print("Inicializando sistema RAG...") | |
# Documentos de ejemplo | |
documents = [ | |
"My name is Jean and I live in Paris.", | |
"My name is Mark and I live in Berlin.", | |
"My name is Giorgio and I live in Rome." | |
] | |
# Configuración de audio | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 16000 | |
CHUNK = 1024 | |
LISTEN_TIME = 5 # Segundos de escucha antes de procesar | |
# Inicializar PyAudio y la cola de audio | |
audio_queue = queue.Queue() | |
audio = pyaudio.PyAudio() | |
# Configurar el motor de voz | |
engine = pyttsx3.init() | |
engine.setProperty('rate', 150) # Velocidad de habla | |
# Función para realizar la búsqueda en los documentos | |
def simple_search(query, documents): | |
# Implementación simple de búsqueda | |
relevant_docs = [] | |
for doc in documents: | |
# Búsqueda básica de palabras clave | |
if any(keyword.lower() in doc.lower() for keyword in query.lower().split()): | |
relevant_docs.append(doc) | |
return relevant_docs if relevant_docs else documents | |
# Función para generar respuesta | |
def generate_response(query, relevant_docs): | |
# Generar prompt para OpenAI | |
prompt = f""" | |
Given these documents, answer the question. | |
Documents: | |
{chr(10).join(['- ' + doc for doc in relevant_docs])} | |
Question: {query} | |
Answer: | |
""" | |
# Llamar a OpenAI para generar respuesta | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant that answers questions based on the provided documents."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
return response.choices[0].message.content | |
# Función para capturar audio en tiempo real | |
def record_audio(): | |
try: | |
stream = audio.open(format=FORMAT, channels=CHANNELS, | |
rate=RATE, input=True, | |
frames_per_buffer=CHUNK) | |
print("🎤 Asistente RAG activado. Habla para hacer una pregunta...") | |
while True: | |
frames = [] | |
start_time = time.time() | |
while time.time() - start_time < LISTEN_TIME: | |
try: | |
data = stream.read(CHUNK, exception_on_overflow=False) | |
frames.append(data) | |
except Exception as e: | |
print(f"Error leyendo audio: {e}") | |
time.sleep(0.1) | |
# Guardar audio en un archivo temporal | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
audio_data = np.frombuffer(b''.join(frames), dtype=np.int16) | |
sf.write(temp_file.name, audio_data, RATE) | |
audio_queue.put(temp_file.name) | |
except Exception as e: | |
print(f"Error en la grabación: {e}") | |
# Función para transcribir y responder | |
def process_audio(): | |
while True: | |
audio_file_path = audio_queue.get() | |
try: | |
# Comprobar que el archivo existe y tiene contenido | |
if os.path.exists(audio_file_path) and os.path.getsize(audio_file_path) > 0: | |
# Transcribir con OpenAI Whisper API | |
with open(audio_file_path, "rb") as audio_file: | |
transcription = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file | |
) | |
question = transcription.text.strip() | |
# Eliminar el archivo temporal después de usarlo | |
try: | |
os.unlink(audio_file_path) | |
except Exception: | |
pass | |
if question: | |
print(f"📝 Pregunta transcrita: {question}") | |
# Implementar proceso RAG manualmente | |
try: | |
# 1. Recuperar documentos relevantes | |
relevant_docs = simple_search(question, documents) | |
# 2. Generar respuesta | |
reply = generate_response(question, relevant_docs) | |
print(f"🤖 Respuesta: {reply}") | |
# 3. Leer la respuesta en voz alta | |
engine.say(reply) | |
engine.runAndWait() | |
except Exception as e: | |
print(f"Error en el procesamiento RAG: {e}") | |
engine.say("Lo siento, no pude procesar tu pregunta correctamente.") | |
engine.runAndWait() | |
else: | |
print("Archivo de audio vacío o no existe") | |
except Exception as e: | |
print(f"Error en el procesamiento: {e}") | |
try: | |
os.unlink(audio_file_path) | |
except: | |
pass | |
# Ejecutar en hilos separados | |
record_thread = threading.Thread(target=record_audio, daemon=True) | |
process_thread = threading.Thread(target=process_audio, daemon=True) | |
record_thread.start() | |
process_thread.start() | |
# Mantener el programa corriendo | |
try: | |
while True: | |
time.sleep(1) | |
except KeyboardInterrupt: | |
print("Deteniendo el asistente...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment