Skip to content

Instantly share code, notes, and snippets.

View davidmezzetti's full-sized avatar

David Mezzetti davidmezzetti

View GitHub Profile

Results using txtai's benchmark script. All vectors generated using all-MiniLM-L6-v2

Index and search times are similar for all methods. For larger sources (like FiQA), the index time will be lower as IVF training is not required.

ArguAna

Method Disk (MB) NDCG_10
Faiss IVF 13.7 MB 0.4761
Faiss IVF SQ4 2.3 MB 0.4739
from unsloth import FastModel
from datasets import load_dataset
from txtai.pipeline import HFTrainer
# Load training dataset
train = load_dataset(...)
# Full fine-tune (16-bit)
# Set load_in_4bit=True and trainer.lora=True to train a QLoRA model
from txtai import Embeddings
# SQLite + NumPy
embeddings = Embeddings(content=True, backend="numpy")
embeddings.index(["test"])
embeddings.save("test")
# SQLite File
# sqlite3 test/documents "SELECT * FROM sections"
# 0|0|test||2026-01-16 16:12:03.410194+00:00
# Install TxtAI with Torch CPU
pip install txtai torch==2.9.1+cpu -f https://download.pytorch.org/whl/torch
# Disk usage is much less
# Torch CPU = 1.1G
# Torch CUDA = 6.9G
# Number of installed packages is also less
# Torch CPU = 27
# Torch CUDA = 43
#
# pip install ai-edge-torch txtai
#
# See https://github.com/google-ai-edge/ai-edge-torch
import torch
import ai_edge_torch
import numpy as np
from txtai import Embeddings
from txtai.pipeline import HFOnnx
# Export as quantized onnx model
# Model Size = 1.1 MB!
path = HFOnnx()(
"neuml/biomedbert-hash-nano-embeddings",
"pooling", "model.onnx", True
)
from txtai import Embeddings
# Load the 100K most viewed Wikipedia articles
embeddings = Embeddings()
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia-slim")
# Show a random article
embeddings.search("""
SELECT id, text, 'https://en.wikipedia.org/wiki/' || replace(id, ' ', '_') url
FROM txtai
from txtai import Agent
# Define tools
tools = [
"websearch", # Runs a websearch using default engine
"webview", # Loads a web page
]
# Define LLM
model = "Qwen/Qwen3-4B-Instruct-2507"
import numpy as np
from txtai.ann import ANNFactory
# Index 10M vectors using llama.cpp style quants
ann = None
for _ in range(1000):
# Generate batch of vectors
batch = np.random.rand(10000, 768).astype(np.float32)
if not ann:
# RAG Quick Start
# Easy to use way to get started with RAG using YOUR data
#
# For a complete application see this: https://github.com/neuml/rag
#
# TxtAI has 70+ example notebooks covering everything the framework provides
# Examples: https://neuml.github.io/txtai/examples
#
# Install TxtAI
# pip install txtai[pipeline-data]