Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rupertlssmith/f22b047d28807874bf4c23b6b5419cd0 to your computer and use it in GitHub Desktop.
Save rupertlssmith/f22b047d28807874bf4c23b6b5419cd0 to your computer and use it in GitHub Desktop.
Codebase Chat CLI - A command-line interface for interacting with codebases using local LLMs via Ollama.
#!/usr/bin/env python3
"""
Codebase Chat CLI - GPU Accelerated
A command-line interface for interacting with codebases using local LLMs via Ollama.
Supports GPU acceleration for improved performance and ChromaDB for vector indexing.
Features:
- Vector index creation of source code files with ChromaDB and Ollama embeddings
- .codechatignore support for excluding files/folders
- Interactive querying of indexed codebases
- GPU and Apple Silicon acceleration (CUDA/MPS) for embeddings and chat
- Project management capabilities (indexing, analysis, listing)
- Multi-language support (Java, Kotlin, Python, JS, TS, Go, Rust, C++, etc.)
- Dry-run mode for previewing indexing operations
Environment Variables:
- OLLAMA_MODEL: Default chat model (e.g., "phi4:14b")
- OLLAMA_EMBED_MODEL: Embedding model (e.g., "nomic-embed-text")
- OLLAMA_URL: Ollama API endpoint (default: http://localhost:11434)
- INDEX_ROOT: Root directory for storing vector indexes
"""
import os
import sys
import argparse
import shutil
import time
import re
import chromadb
import torch
from pathlib import Path
from typing import Optional, List, Dict, Any
from dotenv import load_dotenv
from pathspec import PathSpec
from packaging import version
# Enhanced LlamaIndex imports
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.core.settings import Settings
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.prompts import PromptTemplate
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.ollama import Ollama
# --- Configuration ---
load_dotenv()
DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "phi4:14b")
#DEFAULT_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "nomic-embed-text")
DEFAULT_EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL", "all-minilm")
DEFAULT_OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
INDEX_ROOT = os.getenv("INDEX_ROOT", os.path.expanduser("~/.codechat/indexes"))
ALLOWED_EXTENSIONS = {".java", ".kt", ".py", ".js", ".ts", ".go", ".rs", ".cpp", ".h", ".xml", ".properties", ".yml",
".md"}
# Quality Improvement Defaults
DEFAULT_CHUNK_SIZE = 512
DEFAULT_CHUNK_OVERLAP = 128
DEFAULT_SIMILARITY_TOP_K = 3
# Enhanced QA Prompt
CODE_QA_PROMPT = PromptTemplate("""
You are a senior developer analyzing this codebase. Provide:
1. Concise technical explanation
2. Relevant code snippets with source file references
3. Usage examples when appropriate
4. Any potential issues or caveats
Format your response in markdown with proper code blocks.
Context: {context_str}
Question: {query_str}
Answer:
""")
# Timeout settings (seconds)
DEFAULT_TIMEOUT = 60
MAX_RETRIES = 2
# Minimum required versions
MIN_CHROMADB_VERSION = "0.4.0"
MIN_TORCH_VERSION = "1.10.0"
def validate_project_name(name: str) -> bool:
"""
Validates a project name to ensure it is safe for use as a filesystem directory name.
Args:
name (str): The project name to validate.
Returns:
bool: True if the name is valid (contains only letters, numbers, underscores, or hyphens), False otherwise.
"""
if not name:
return False
return bool(re.match(r'^[a-zA-Z0-9_-]+$', name))
def check_dependencies() -> None:
"""
Checks the versions of required dependencies and prints warnings if they
do not meet the minimum required versions.
"""
try:
chroma_version = version.parse(chromadb.__version__)
if chroma_version < version.parse(MIN_CHROMADB_VERSION):
print(f"⚠️ ChromaDB version {chromadb.__version__} is below minimum required {MIN_CHROMADB_VERSION}")
torch_version = version.parse(torch.__version__)
if torch_version < version.parse(MIN_TORCH_VERSION):
print(f"⚠️ PyTorch version {torch.__version__} is below minimum required {MIN_TORCH_VERSION}")
except Exception as e:
print(f"⚠️ Could not verify dependency versions: {str(e)}")
def get_device(force_cpu: bool = False) -> str:
"""
Determines the most suitable compute device for processing.
Args:
force_cpu (bool): If True, always return 'cpu' regardless of available hardware.
Returns:
str: The device to use ('cuda', 'mps', or 'cpu').
"""
if not force_cpu and torch.cuda.is_available():
return "cuda"
elif not force_cpu and torch.backends.mps.is_available():
return "mps" # Apple Silicon
return "cpu"
def should_index_file(path: Path) -> bool:
"""
Checks whether a given file should be indexed based on its file extension.
Args:
path (Path): The file path to check.
Returns:
bool: True if the file extension is supported; False otherwise.
"""
return path.suffix.lower() in ALLOWED_EXTENSIONS
def gather_files(
codebase_path: Path,
verbose: bool = False,
ignore_file_path: Optional[Path] = None
) -> List[str]:
"""
Recursively collects file paths from a codebase directory, applying .codechatignore patterns if present.
Args:
codebase_path (Path): Root directory of the codebase.
verbose (bool, optional): Enables detailed output during file collection. Defaults to False.
ignore_file_path (Optional[Path], optional): Custom path to a .codechatignore file.
If None, looks for .codechatignore in default locations. Defaults to None.
Returns:
List[str]: A list of string paths to source files eligible for indexing.
"""
# Look for ignore files in priority order
possible_ignore_files = []
if ignore_file_path:
possible_ignore_files.append(ignore_file_path)
possible_ignore_files.extend([
Path.cwd() / ".codechatignore",
codebase_path / ".codechatignore"
])
spec = None
for ignore_file in possible_ignore_files:
if ignore_file.exists():
if verbose:
print(f"🔍 Found .codechatignore at {ignore_file}")
with ignore_file.open("r", encoding="utf-8") as f:
patterns = [line.strip() for line in f if line.strip() and not line.startswith("#")]
if verbose and patterns:
print(f"📜 Ignore patterns: {patterns}")
spec = PathSpec.from_lines("gitwildmatch", patterns)
break
files = []
for p in codebase_path.rglob("*"):
if not p.is_file():
continue
if not should_index_file(p):
if verbose:
print(f"➖ Skipping (extension): {p}")
continue
try:
rel_path = p.relative_to(codebase_path).as_posix()
if verbose:
print(f"🔄 Testing path: {rel_path}")
except ValueError:
if verbose:
print(f"⚠️ Path error: {p}")
continue
if spec and spec.match_file(rel_path):
if verbose:
print(f"🚫 Excluded by pattern: {rel_path}")
continue
files.append(str(p))
if verbose:
print(f"✅ Added: {p}")
return files
def verify_metadata(index: VectorStoreIndex) -> bool:
"""
Verifies that metadata is present for each node in the index.
Args:
index (VectorStoreIndex): VectorStoreIndex instance to verify.
Returns:
bool: True if all nodes contain source metadata; False otherwise.
"""
for node_id, node in index.docstore.docs.items():
if not node.metadata.get('source_file'):
print(f"⚠️ Missing source_file in node {node_id}")
return False
return True
def build_index(
project: str,
codebase_path: Path,
embed_model: str,
device: str,
clean: bool = False,
dry_run: bool = False,
verbose: bool = False,
ignore_file_path: Optional[Path] = None,
chunk_size: int = DEFAULT_CHUNK_SIZE,
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
) -> None:
"""
Builds a ChromaDB-based vector index for the specified project.
Args:
project (str): Project name for indexing.
codebase_path (Path): Path to the source code directory.
embed_model (str): The embedding model name for document vectorization.
device (str): Compute device identifier (e.g., 'cuda', 'cpu', 'mps').
clean (bool, optional): If True, deletes and rebuilds the index. Defaults to False.
dry_run (bool, optional): If True, only simulates the indexing process. Defaults to False.
verbose (bool, optional): Enables debug output. Defaults to False.
ignore_file_path (Optional[Path], optional): Custom .codechatignore path. Defaults to None.
chunk_size (int, optional): Maximum token chunk size for embedding. Defaults to DEFAULT_CHUNK_SIZE.
chunk_overlap (int, optional): Overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP.
Raises:
SystemExit: If no indexable files are found.
"""
project_index_path = Path(INDEX_ROOT) / project
if clean:
shutil.rmtree(project_index_path, ignore_errors=True)
os.makedirs(project_index_path, exist_ok=True)
indexed_files = gather_files(codebase_path, verbose, ignore_file_path)
if not indexed_files:
print("❌ No indexable files found.")
sys.exit(1)
if dry_run:
print(f"✅ Dry run complete (would index {len(indexed_files)} files)")
return
# Document processing
node_parser = TokenTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
separator="\n"
)
documents = SimpleDirectoryReader(
input_files=indexed_files,
file_metadata=lambda x: {
'source_file': str(Path(x).absolute()),
'file_path': str(Path(x).relative_to(codebase_path)),
'file_name': Path(x).name,
'timestamp': time.time()
}
).load_data()
# Configure Settings instead of ServiceContext
Settings.llm = Ollama(model=DEFAULT_MODEL, base_url=DEFAULT_OLLAMA_URL)
Settings.embed_model = OllamaEmbedding(model_name=embed_model, device=device)
Settings.node_parser = node_parser
Settings.chunk_size = chunk_size
Settings.chunk_overlap = chunk_overlap
# Create index
chroma_client = chromadb.PersistentClient(path=str(project_index_path))
vector_store = ChromaVectorStore(chroma_collection=chroma_client.get_or_create_collection(f"{project}_collection"))
index = VectorStoreIndex.from_documents(
documents,
storage_context=StorageContext.from_defaults(vector_store=vector_store),
show_progress=verbose
)
index.storage_context.persist()
if not verify_metadata(index):
print("❌ Metadata issues detected - some sources may show as Unknown")
print(f"\n✅ Index built with {len(indexed_files)} files (chunk size: {chunk_size}, overlap: {chunk_overlap})")
def chat(
project: str,
model: str,
embed_model: str,
temperature: float,
num_ctx: int,
top_p: float,
repeat_penalty: float,
device: str,
verbose: bool = False,
similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K,
timeout: int = DEFAULT_TIMEOUT,
max_retries: int = MAX_RETRIES,
chunk_size: int = DEFAULT_CHUNK_SIZE,
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
) -> None:
"""
Start an interactive chat session with an indexed codebase.
Args:
project (str): Name of the project to chat with.
model (str): Ollama model name to use for chat.
embed_model (str): Ollama embedding model name.
temperature (float): Controls randomness of responses (0.0-1.0).
num_ctx (int): Context window size in tokens.
top_p (float): Top-p sampling parameter.
repeat_penalty (float): Penalty for repeated tokens.
device (str): Compute device to use ('cuda', 'mps', or 'cpu').
verbose (bool, optional): If True, prints detailed progress information. Defaults to False.
similarity_top_k (int, optional): Number of similar chunks to retrieve. Defaults to DEFAULT_SIMILARITY_TOP_K.
timeout (int, optional): Query timeout in seconds. Defaults to DEFAULT_TIMEOUT.
max_retries (int, optional): Number of retry attempts on timeout. Defaults to MAX_RETRIES.
chunk_size (int, optional): Text chunk size for processing. Defaults to DEFAULT_CHUNK_SIZE.
chunk_overlap (int, optional): Context overlap between chunks. Defaults to DEFAULT_CHUNK_OVERLAP.
Raises:
SystemExit: If no index is found for the specified project.
"""
project_index_path = Path(INDEX_ROOT) / project
if not project_index_path.exists():
print(f"❌ No index found for '{project}'. Run with --reindex first.")
sys.exit(1)
# Configure for quality responses
Settings.embed_model = OllamaEmbedding(
model_name=embed_model,
base_url=DEFAULT_OLLAMA_URL,
device=device
)
Settings.llm = Ollama(
model=model,
base_url=DEFAULT_OLLAMA_URL,
temperature=temperature,
num_ctx=num_ctx,
top_p=top_p,
repeat_penalty=repeat_penalty,
device=device,
request_timeout=timeout
)
# Quality-optimized query engine
chroma_client = chromadb.PersistentClient(path=str(project_index_path))
vector_store = ChromaVectorStore(chroma_collection=chroma_client.get_collection(f"{project}_collection"))
index = VectorStoreIndex.from_vector_store(vector_store)
query_engine = index.as_query_engine(
similarity_top_k=similarity_top_k,
include_metadata=True,
metadata_fields=['source_file', 'file_name', 'file_path'],
vector_store_query_mode="hybrid",
response_mode="tree_summarize",
text_qa_template=CODE_QA_PROMPT,
verbose=verbose,
timeout=timeout,
retry_on_timeout=True,
max_retries=max_retries
)
sample_embedding = Settings.embed_model.get_text_embedding("sample code class")
print(f"Embedding dimension: {len(sample_embedding)}")
def debug_index_metadata(index: VectorStoreIndex, verbose: bool = True) -> None:
"""Debug function to check what metadata exists in the index.
Args:
index (VectorStoreIndex): The index to debug.
verbose (bool, optional): If True, prints detailed information. Defaults to True.
"""
if not verbose:
return
print("\n🔍 Debugging index metadata:")
try:
collection = index._vector_store._collection
print(f"Collection name: {collection.name}")
print(f"Total vectors: {collection.count()}")
# Get sample items with metadata
items = collection.get(limit=3, include=["metadatas", "documents"])
if items and "metadatas" in items:
print("\nSample metadata found:")
for i, (meta, doc) in enumerate(zip(items["metadatas"], items["documents"][:3])):
print(f" {i + 1}. Metadata: {meta}")
print(f" First 50 chars: {doc[:50]}...\n")
else:
print("⚠️ No metadata found in collection")
except Exception as e:
print(f"⚠️ Error checking metadata: {str(e)}")
print("Trying alternative access method...")
try:
# Alternative way to check nodes
nodes = index.docstore.docs
print(f"\nFound {len(nodes)} nodes in docstore")
for node_id, node in list(nodes.items())[:3]:
print(f"Node {node_id}:")
print(f" Metadata: {node.metadata}")
print(f" Text: {node.text[:50]}...\n")
except Exception as e2:
print(f"⚠️ Couldn't access docstore either: {str(e2)}")
print("\n🔎 Verifying index structure...")
debug_index_metadata(index, verbose=True)
# Additional verification
print("\n🔍 Index Verification:")
try:
print(f"- Vectors: {index._vector_store._collection.count()}")
if hasattr(index, 'docstore'):
print(f"- Documents: {len(index.docstore.docs)}")
else:
print("- Docstore: Not available (normal for ChromaDB)")
except Exception as e:
print(f"⚠️ Verification note: {str(e)}")
debug_index_metadata(index, verbose=True)
# Response enhancement functions
def enhance_query(query: str) -> str:
"""Add context based on query type to get better responses.
Args:
query (str): The original user query.
Returns:
str: The enhanced query with additional context.
"""
query = query.strip()
lower_query = query.lower()
# Module/package queries
if "module" in query or "package" in query:
return ("List all Java modules/packages with their relative paths, "
"main classes, and 1-2 sentence descriptions. "
"Include the module's purpose and key features.")
# Explanation queries
elif any(q_word in lower_query for q_word in ["how", "why", "explain"]):
return f"{query} (provide detailed explanation with code references)"
# Example queries
elif "example" in lower_query:
return f"{query} (include practical usage examples)"
# Default case - return original query
return query
def format_response(response: Any) -> str:
"""Formats the response with source references.
Args:
response (Any): The query response object.
Returns:
str: The formatted response text with sources.
"""
text = response.response
# Source nodes handling
if hasattr(response, 'source_nodes') and response.source_nodes:
sources = []
for node in response.source_nodes[:3]: # Show top 3 sources
source = node.metadata.get('source_file') or node.metadata.get('file_path', 'Unknown')
if source != 'Unknown':
try:
# First try making it relative to INDEX_ROOT
source = str(Path(source).relative_to(INDEX_ROOT))
except ValueError:
try:
# If that fails, just show the filename
source = Path(source).name
except:
source = "Unknown path"
sources.append(f"- {source} (score: {node.score:.2f})")
text += "\n\n🔍 Sources:\n" + "\n".join(sources)
return text
# Interactive chat loop
print(f"\n💬 Chatting with {project} (Enhanced Mode)")
print("Type 'exit' or press Ctrl+C to quit\n")
# Show optimization tips if settings might cause performance issues
optimization_params = {
'timeout': timeout,
'chunk_size': chunk_size,
'chunk_overlap': chunk_overlap,
'similarity_top_k': similarity_top_k,
'model': model
}
tips = get_optimization_tips(optimization_params)
if tips:
print("\n💡 Performance Tips:")
for tip in tips:
print(f" - {tip}")
print()
while True:
try:
question = input("🤖 > ").strip()
if question.lower() in {"exit", "quit"}:
break
start_time = time.time()
try:
response = query_engine.query(enhance_query(question))
print(f"\n{format_response(response)}")
# DEBUG: Show raw source nodes
if hasattr(response, 'source_nodes'):
print("\n🔍 DEBUG - Source Nodes:")
for i, node in enumerate(response.source_nodes[:3]):
print(f"Node {i + 1}:")
print(f" Score: {node.score}")
try:
print(f" Path: {node.metadata.get('file_path')}")
print(f" Source: {node.metadata.get('source_file')}")
except Exception as e:
print(f" Metadata error: {str(e)}")
print(f" Text: {node.text[:100]}...")
except Exception as e:
if "timeout" in str(e).lower():
print("\n⏱️ The query timed out. Try:")
print("- Asking a more specific question")
print(f"- Increasing timeout (current: {timeout}s)")
print(f"- Reducing chunk size (current: {chunk_size})")
else:
print(f"\n❌ Query Error: {str(e)}")
print(f"\n⏱️ Response time: {time.time() - start_time:.2f}s")
except KeyboardInterrupt:
print("\n👋 Exiting...")
break
def list_projects(verbose: bool = False) -> None:
"""
Display all indexed projects with accurate status.
Args:
verbose (bool, optional): If True, shows additional details about each project. Defaults to False.
"""
index_root_path = Path(INDEX_ROOT)
if not index_root_path.exists():
print("No projects indexed yet.")
return
print("📂 Indexed Projects:")
for project_dir in sorted(index_root_path.iterdir()):
if project_dir.is_dir():
status = "❌"
size_info = "unknown"
try:
client = chromadb.PersistentClient(path=str(project_dir))
collections = client.list_collections()
if collections:
# Find matching collection
for col in collections:
if col.name == project_dir.name or col.name == f"{project_dir.name}_collection":
count = col.count()
size_info = f"{count} vectors"
status = "✅"
break
except Exception as e:
if verbose:
print(f"⚠️ Error checking {project_dir.name}: {str(e)}")
print(f" - {project_dir.name} {status} ({size_info})")
def show_config(args: argparse.Namespace) -> None:
"""
Display the current configuration including hardware and model settings.
Args:
args (argparse.Namespace): Parsed command-line arguments.
"""
device = get_device(force_cpu=args.cpu)
gpu_type = "None"
if device == "cuda":
gpu_type = torch.cuda.get_device_name(0)
elif device == "mps":
gpu_type = "Apple Silicon (MPS)"
print("⚙️ Current Configuration:")
print(f" Project: {args.project if hasattr(args, 'project') else 'N/A'}")
print(f" Model: {args.model}")
print(f" Embed Model: {args.embed_model}")
print(f" Device: {device.upper()} ({gpu_type})")
print(f" Temperature: {args.temperature}")
print(f" Context Window: {args.num_ctx} tokens")
print("\n🛠️ Paths:")
print(f" Index Root: {INDEX_ROOT}")
print(f" Ollama URL: {DEFAULT_OLLAMA_URL}")
# Show ignore file info if available
ignore_locations = [
Path(args.ignore_file) if hasattr(args, 'ignore_file') and args.ignore_file else None,
Path.cwd() / ".codechatignore",
Path(args.reindex) / ".codechatignore" if hasattr(args, 'reindex') and args.reindex else None
]
found = False
for loc in ignore_locations:
if loc and loc.exists():
print(f"\n🔍 Active .codechatignore at: {loc}")
with open(loc, 'r') as f:
print(" Ignore Patterns:")
for line in f:
line = line.strip()
if line and not line.startswith("#"):
print(f" - {line}")
found = True
break
if not found:
print("\n⚠️ No .codechatignore file found")
def analyze_project(project: str, verbose: bool = False) -> None:
"""
Display detailed analytics about an indexed project.
Args:
project (str): Name of the project to analyze.
verbose (bool, optional): If True, shows additional storage details. Defaults to False.
Raises:
None: This function handles errors gracefully and prints messages instead of raising exceptions.
"""
project_path = Path(INDEX_ROOT) / project
if not project_path.exists():
print(f"❌ Project '{project}' not found")
return
print(f"\n📊 Analysis for '{project}':")
print("─" * 50)
# 1. Enhanced ChromaDB Stats
try:
client = chromadb.PersistentClient(path=str(project_path))
collection = client.get_collection(f"{project}_collection")
# Count vectors and their distribution
count = collection.count()
metadata = collection.get(include=["metadatas"])
file_types = {}
file_sizes = {}
if metadata and "metadatas" in metadata:
for item in metadata["metadatas"]:
if item and isinstance(item, dict) and "file_path" in item:
try:
ext = Path(item["file_path"]).suffix.lower()
file_types[ext] = file_types.get(ext, 0) + 1
# Get file size if available
if "file_size" in item:
file_sizes[ext] = file_sizes.get(ext, 0) + int(item["file_size"])
except (TypeError, AttributeError) as e:
if verbose:
print(f"⚠️ Could not process metadata item: {str(e)}")
continue
print("\n📈 Embedding Statistics:")
print(f" - Total vectors: {count}")
if file_types:
print(" - File type distribution:")
for ext, num in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
size_info = ""
if ext in file_sizes:
size_info = f" ({file_sizes[ext] / 1024:.1f} KB total)"
print(f" - {ext if ext else 'no-extension'}: {num} vectors{size_info}")
except Exception as e:
print(f"⚠️ Couldn't read ChromaDB collection: {str(e)}")
if "truth value of an array" in str(e):
print("💡 Try upgrading ChromaDB: pip install --upgrade chromadb numpy")
# 2. Storage Analysis
try:
total_size = sum(f.stat().st_size for f in project_path.glob('**/*') if f.is_file())
print("\n💾 Storage Usage:")
print(f" - Index size: {total_size / 1024 / 1024:.2f} MB")
print(f" - Files: {len(list(project_path.glob('**/*')))}")
if verbose:
print("\n🔍 Detailed Storage Breakdown:")
for item in project_path.iterdir():
if item.is_file():
print(f" - {item.name}: {item.stat().st_size / 1024:.1f} KB")
elif item.is_dir():
dir_size = sum(f.stat().st_size for f in item.glob('**/*') if f.is_file())
print(f" - {item.name}/: {dir_size / 1024:.1f} KB")
except Exception as e:
print(f"⚠️ Couldn't analyze storage: {str(e)}")
# 3. Health Check - Updated for ChromaDB v0.4+ format
print("\n🩺 Health Check:")
healthy = True
# Required files for ChromaDB v0.4+
required_files = {
"chroma.sqlite3": "SQLite database",
}
# Optional files
optional_files = {
"chroma_settings.json": "Settings file",
"chroma-embeddings.parquet": "Embeddings data (legacy)"
}
# Check required files
for file, desc in required_files.items():
if (project_path / file).exists():
print(f" - ✅ {desc} present")
else:
print(f" - ❌ {desc} missing!")
healthy = False
# Check optional files
for file, desc in optional_files.items():
if (project_path / file).exists():
print(f" - ☑️ {desc} present")
else:
print(f" - ⚠️ {desc} not found (optional)")
# Check collection exists and is accessible
try:
client = chromadb.PersistentClient(path=str(project_path))
collection = client.get_collection(f"{project}_collection")
print(f" - ✅ Collection accessible ({collection.count()} vectors)")
except Exception as e:
print(f" - ❌ Collection error: {str(e)}")
healthy = False
print(f"\n{'✅ Index is healthy' if healthy else '❌ Index has issues!'}")
print("─" * 50)
def repair_project(project: str, verbose: bool = False) -> None:
"""
Attempt to repair a potentially corrupted index.
Args:
project (str): Name of the project to repair.
verbose (bool, optional): If True, shows additional repair details. Defaults to False.
"""
project_path = Path(INDEX_ROOT) / project
if not project_path.exists():
print(f"❌ Project directory '{project}' not found")
return
print(f"\n🔧 Repairing project '{project}'...")
try:
client = chromadb.PersistentClient(path=str(project_path))
# ChromaDB uses different collection naming in newer versions
collections = client.list_collections()
if not collections:
raise ValueError("No collections found in project directory")
# Try both naming conventions
collection_name = None
for col in collections:
if col.name == project or col.name == f"{project}_collection":
collection_name = col.name
break
if not collection_name:
raise ValueError(f"No matching collection found (tried: '{project}', '{project}_collection')")
if verbose:
print(f"🔄 Found collection: {collection_name}")
collection = client.get_collection(collection_name)
count = collection.count()
print(f"\n✅ Repair successful - project is healthy")
print(f" Collection: {collection_name}")
print(f" Total vectors: {count}")
except Exception as e:
print(f"\n❌ Repair failed: {str(e)}")
print("\nRecommended solutions:")
print(f"1. Clean reindex: --project {project} --reindex /path/to/code --clean")
print(f"2. Manual repair steps:")
print(f" - Delete directory: {project_path}")
print(f" - Check collection name in: {project_path}/chroma.sqlite3")
def get_optimization_tips(params: Dict[str, Any]) -> List[str]:
"""Generate performance optimization suggestions based on current parameters.
Args:
params (Dict[str, Any]): Dictionary of current configuration parameters.
Returns:
List[str]: List of optimization tips.
"""
tips = []
# Timeout-related tips
if params['timeout'] < 30:
tips.append(f"Increase timeout (current: {params['timeout']}s)")
# Chunking-related tips
if params['chunk_size'] > 768:
tips.append(f"Reduce chunk size (current: {params['chunk_size']})")
if params['chunk_overlap'] > 128:
tips.append(f"Reduce chunk overlap (current: {params['chunk_overlap']})")
# Retrieval-related tips
if params['similarity_top_k'] > 3:
tips.append(f"Reduce retrieved chunks (current: {params['similarity_top_k']})")
# Model-related tips
if "34b" in params['model'] or "70b" in params['model']:
tips.append(f"Try smaller model (current: {params['model']})")
return tips
def main():
"""Entry point for the Codebase Chat CLI application.
Handles command-line arguments and orchestrates the main application flow including:
- Dependency checks
- Project management (listing, analyzing, repairing)
- Indexing operations
- Chat functionality
Command Line Arguments:
--project PROJECT_NAME : Specifies project to operate on (for chat/reindex/repair)
--list-projects : Lists all indexed projects
--show-config : Displays current configuration
--repair PROJECT : Attempts to repair a corrupted index
--reindex PATH : Path to codebase to index
--analyze : Shows detailed project analysis
--model MODEL_NAME : Specifies Ollama model to use (default: DEFAULT_MODEL)
--embed-model EMBED_MODEL : Specifies Ollama embedding model (default: DEFAULT_EMBED_MODEL)
--cpu : Forces CPU mode
--gpu : Forces GPU mode if available
--temperature FLOAT : Sets model temperature (default: 0.0)
--num-ctx INT : Sets context window size (default: 8192)
--top-p FLOAT : Sets top-p sampling value (default: 1.0)
--repeat-penalty FLOAT : Sets repetition penalty (default: 1.0)
--clean : Deletes and recreates the index
--dry-run : Only lists files to be indexed
--verbose : Shows detailed debug output
--ignore-file PATH : Path to custom .codechatignore file
--chunk-size INT : Text chunk size for processing (default: DEFAULT_CHUNK_SIZE)
--chunk-overlap INT : Context overlap between chunks (default: DEFAULT_CHUNK_OVERLAP)
--similarity-top-k INT : Number of similar chunks to retrieve (default: DEFAULT_SIMILARITY_TOP_K)
--timeout INT : Query timeout in seconds (default: DEFAULT_TIMEOUT)
--max-retries INT : Number of retry attempts on timeout (default: MAX_RETRIES)
"""
# Check dependencies first
check_dependencies()
parser = argparse.ArgumentParser(
description="Quality-Enhanced Codebase Chat CLI",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
# Create mutually exclusive group for main actions
action_group = parser.add_mutually_exclusive_group(required=True)
action_group.add_argument("--project", help="Project name (for chat/reindex/repair)")
action_group.add_argument("--list-projects", action="store_true",
help="List all indexed projects")
action_group.add_argument("--show-config", action="store_true",
help="Show current configuration")
action_group.add_argument("--repair", metavar="PROJECT",
help="Attempt to repair a corrupted index")
# Project-specific arguments
parser.add_argument("--reindex", metavar="PATH", help="Path to codebase to index")
parser.add_argument("--analyze", action="store_true",
help="Show detailed project analysis")
# Model settings
parser.add_argument("--model", default=DEFAULT_MODEL, help="Ollama model name")
parser.add_argument("--embed-model", default=DEFAULT_EMBED_MODEL,
help=f"Ollama embedding model (default: {DEFAULT_EMBED_MODEL})")
# Hardware control
parser.add_argument("--cpu", action="store_true", help="Force CPU mode")
parser.add_argument("--gpu", action="store_true", help="Force GPU mode if available")
# Performance tuning
parser.add_argument("--temperature", type=float,
default=float(os.getenv("OLLAMA_TEMPERATURE", 0.0)),
help="Model temperature")
parser.add_argument("--num-ctx", type=int,
default=int(os.getenv("OLLAMA_NUM_CTX", 8192)),
help="Context window size")
parser.add_argument("--top-p", type=float,
default=float(os.getenv("OLLAMA_TOP_P", 1.0)),
help="Top-p sampling")
parser.add_argument("--repeat-penalty", type=float,
default=float(os.getenv("OLLAMA_REPEAT_PENALTY", 1.0)),
help="Repetition penalty")
# Utility flags
parser.add_argument("--clean", action="store_true",
help="Delete and recreate the index")
parser.add_argument("--dry-run", action="store_true",
help="Only list files to be indexed")
parser.add_argument("--verbose", action="store_true",
help="Show detailed debug output")
parser.add_argument("--ignore-file",
help="Path to custom .codechatignore file")
# Add quality parameters
parser.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE,
help="Text chunk size for processing")
parser.add_argument("--chunk-overlap", type=int, default=DEFAULT_CHUNK_OVERLAP,
help="Context overlap between chunks")
parser.add_argument("--similarity-top-k", type=int, default=DEFAULT_SIMILARITY_TOP_K,
help="Number of similar chunks to retrieve")
parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT,
help="Query timeout in seconds")
parser.add_argument("--max-retries", type=int, default=MAX_RETRIES,
help="Number of retry attempts on timeout")
args = parser.parse_args()
# Handle global commands first
if args.list_projects:
list_projects(verbose=args.verbose)
return
if args.show_config:
show_config(args)
return
if args.repair:
repair_project(args.repair, verbose=args.verbose)
return
# Validate project-specific commands
if not hasattr(args, 'project') or not args.project:
print("❌ Project name is required for this action")
parser.print_help()
sys.exit(1)
if not validate_project_name(args.project):
print("❌ Invalid project name. Only alphanumeric, underscore and hyphen characters are allowed.")
sys.exit(1)
# Device selection
if args.gpu and args.cpu:
print("❌ Cannot force both GPU and CPU modes")
sys.exit(1)
device = get_device(force_cpu=args.cpu)
if args.gpu and device != "cuda":
print("⚠️ GPU requested but not available - falling back to CPU")
device = "cpu"
if args.verbose:
print(f"\n⚙️ Configuration:")
print(f" Device: {device.upper()} ({'✅ GPU' if device == 'cuda' else '⚠️ CPU'})")
print(f" Model: {args.model}")
print(f" Embed Model: {args.embed_model}")
if hasattr(args, 'project'):
print(f" Project: {args.project}")
print(f" Index Location: {Path(INDEX_ROOT) / args.project}\n")
# Handle project actions
if args.analyze:
analyze_project(args.project, args.verbose)
elif args.reindex:
build_index(
project=args.project,
codebase_path=Path(args.reindex),
embed_model=args.embed_model,
device=device,
clean=args.clean,
dry_run=args.dry_run,
verbose=args.verbose,
ignore_file_path=Path(args.ignore_file) if args.ignore_file else None,
chunk_size=args.chunk_size,
chunk_overlap=args.chunk_overlap
)
else:
chat(
project=args.project,
model=args.model,
embed_model=args.embed_model,
temperature=args.temperature,
num_ctx=args.num_ctx,
top_p=args.top_p,
repeat_penalty=args.repeat_penalty,
device=device,
verbose=args.verbose,
similarity_top_k=args.similarity_top_k,
timeout=args.timeout,
max_retries=args.max_retries,
chunk_size=args.chunk_size,
chunk_overlap=args.chunk_overlap
)
if __name__ == "__main__":
main()
@rupertlssmith
Copy link
Author

ollama

OLLAMA_URL=http://localhost:11434
OLLAMA_MODEL=phi4:14b
OLLAMA_TEMPERATURE=0.2
OLLAMA_NUM_CTX=16384
OLLAMA_TOP_P=0.95
OLLAMA_REPEAT_PENALTY=1.1

index

INDEX_ROOT=~/.codechat/indexes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment