-
OpenAI Compatible Endpoints:
- Expose RESTful API endpoints compatible with OpenAI’s API for seamless integration.
-
Infinite Archival Memory:
- Use PostgreSQL with pgvector to store and query conversational history.
- Provide enriched context retrieval for every query.
-
Multiple LLM Backends:
- Integrate both OpenAI and local Ollama LLMs, allowing configuration to switch between them.
-
Start Small:
- Focus on core functionality: receiving a query, retrieving relevant context from memory, and responding using the configured LLM backend.
-
Backend Framework:
- Use FastAPI for the backend to handle requests and expose endpoints.
- Keep the architecture modular to allow future enhancements like fine-tuning or multi-user support.
-
Database:
- PostgreSQL with pgvector extension for efficient vector-based similarity search.
-
Co-Evolutionary Feedback Layer:
- Introduce a review process for changes suggested by the system itself or external contributors.
- Integrate a "Partner Review" phase at every layer to ensure high-level sanity checks before implementing changes.
- Log decisions made during the review for transparency and traceability.
-
Connect to your PostgreSQL instance:
psql -U postgres
-
Enable the
pgvector
extension:CREATE EXTENSION IF NOT EXISTS vector;
-
Verify the installation:
SELECT * FROM pg_available_extensions WHERE name = 'vector';
-
Restart PostgreSQL if necessary:
sudo service postgresql restart
Run the following SQL commands to create the necessary tables for Eidolon
:
-- Table to store embeddings and related metadata
CREATE TABLE archival_memory (
id SERIAL PRIMARY KEY,
content TEXT NOT NULL,
embedding VECTOR(768) NOT NULL, -- Adjusted dimension based on your embedding model
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Index for efficient similarity search
CREATE INDEX archival_memory_embedding_idx ON archival_memory USING ivfflat (embedding vector_cosine_ops);
-- Table to store configuration details
CREATE TABLE config (
id SERIAL PRIMARY KEY,
key TEXT NOT NULL,
value TEXT NOT NULL
);
Ensure you can connect to the database using the environment variable DATABASE_URL
. Example:
export DATABASE_URL=postgresql://user:password@localhost:5432/eidolon
Test the connection:
psql $DATABASE_URL
To avoid relying on external APIs for embeddings, a local model like SentenceTransformers or Hugging Face models can be used. This eliminates network tolls and ensures data privacy.
Ollama supports embedding models. Configure Ollama to use an embedding model and ensure it is running locally at http://127.0.0.1:11434
. Replace generate_local_embedding
in memory.py
as follows:
import requests
from eidolon.config import Config
def generate_local_embedding(text):
# Connect to the embedding model running on Ollama
url = f"{Config.OLLAMA_API_URL}/api/embedding"
headers = {"Content-Type": "application/json"}
payload = {"input": text}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
embedding = response.json().get('embedding')
if not embedding or len(embedding) != 768:
raise ValueError("Unexpected embedding dimension or missing embedding!")
return embedding
except requests.exceptions.RequestException as e:
print(f"Error generating embedding: {e}")
return None
eidolon/
├── app.py # FastAPI app entry point
├── config.py # Configuration management
├── llm_backend.py # LLM backend logic (renamed)
├── memory.py # Database and memory retrieval logic
├── requirements.txt # Dependencies
└── README.md # Documentation
app.py:
from fastapi import FastAPI, HTTPException
from llm_backend import query_llm
from memory import get_relevant_context, log_query_and_response
app = FastAPI()
@app.post("/query")
async def query_endpoint(user_query: str):
try:
context = get_relevant_context(user_query)
response = query_llm(user_query, context)
log_query_and_response(user_query, response) # Log the query and response
return {"response": response}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
config.py:
import os
class Config:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://127.0.0.1:11434")
DB_URL = os.getenv("DATABASE_URL", "postgresql://user:password@localhost:5432/eidolon")
ACTIVE_LLM = os.getenv("ACTIVE_LLM", "openai") # Options: "openai", "ollama"
llm_backend.py:
import requests
from config import Config
# Query OpenAI LLM
def query_openai(prompt, context):
import openai
openai.api_key = Config.OPENAI_API_KEY
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": context}, {"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# Query Ollama LLM
def query_ollama(prompt, context):
url = Config.OLLAMA_API_URL + "/api/query"
payload = {"prompt": f"{context}\n{prompt}"}
response = requests.post(url, json=payload)
return response.json().get("response", "Error: No response from Ollama")
# Main LLM query function
def query_llm(prompt, context):
if Config.ACTIVE_LLM == "openai":
return query_openai(prompt, context)
elif Config.ACTIVE_LLM == "ollama":
return query_ollama(prompt, context)
else:
raise ValueError("Invalid LLM backend configured")
memory.py:
import psycopg2
from psycopg2.extras import RealDictCursor
from config import Config
def get_db_connection():
return psycopg2.connect(Config.DB_URL, cursor_factory=RealDictCursor)
def get_relevant_context(query):
connection = get_db_connection()
cursor = connection.cursor()
# Generate query embedding
query_embedding = generate_local_embedding(query)
if query_embedding is None:
return "No relevant context available"
cursor.execute(
"""
SELECT content, timestamp
FROM archival_memory
ORDER BY embedding <#> %s::vector ASC
LIMIT 5;
""", (query_embedding,)
)
results = cursor.fetchall()
connection.close()
return "\n".join([f"[{row['timestamp']}] {row['content']}" for row in results])
def log_query_and_response(query, response):
connection = get_db_connection()
cursor = connection.cursor()
cursor.execute(
"""
INSERT INTO archival_memory (content, embedding)
VALUES (%s, %s);
""", (query, generate_local_embedding(query))
)
cursor.execute(
"""
INSERT INTO archival_memory (content, embedding)
VALUES (%s, %s);
""", (response, generate_local_embedding(response))
)
connection.commit()
cursor.close()
connection.close()
To query the API and parse the response with jq
, use the following command:
curl -v -X POST "http://127.0.0.1:8080/query"