This example adds the Playwright MCP service to txtai agents.
Start the Playright MCP server locally.
npx @playwright/mcp@latest --port 8931
| from txtai.pipeline import Textractor | |
| # Docling backend, split text by sections | |
| textractor = Textractor(sections=True, backend="docling") | |
| # BERT Paper | |
| textractor("https://arxiv.org/pdf/1810.04805") | |
| # PDF converted to Markdown, split on Markdown sections | |
| # ['## BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding... |
| from datasets import load_dataset | |
| from sklearn.metrics import accuracy_score | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from txtai.pipeline import HFTrainer | |
| def metrics(pred): | |
| labels, preds = pred.label_ids, pred.predictions.argmax(-1) | |
| # Calculate accuracy | |
| return {"accuracy": accuracy_score(labels, preds)} |
| from txtai import Embeddings | |
| from txtai.pipeline import Textractor | |
| urls = "https://github.com/neuml/txtai" | |
| textractor = Textractor(chunker="semantic") | |
| embeddings = Embeddings(backend="ggml", ggml={"quantize": "q4_0"}) | |
| embeddings.index((url, x) for x in textractor(url)) | |
| embeddings.save("gguf") |
| from txtai import Agent | |
| agent = Agent( | |
| tools=["http://mcp.server/path"], | |
| model="LLM path" | |
| ) |
Blog: https://qwenlm.github.io/blog/qwq-32b/
Model: https://huggingface.co/Qwen/QwQ-32B
License: Apache 2.0
from txtai import LLM| from txtai import Embeddings | |
| # Start the indexing run | |
| embeddings = Embeddings(content=True) | |
| embeddings.index(stream(), checkpoint="checkpoint dir") | |
| # Elapsed time ⏳ then ⚡💥🔥 | |
| # error, power outage, random failure | |
| # Fix the issue 🧑🔧⚙️ |
| from txtai import Embeddings | |
| embeddings = Embeddings(content=True, graph=True) | |
| embeddings.index(...) | |
| # Standard Vector Search | |
| embeddings.search("vector search query") | |
| # Vector SQL query | |
| embeddings.search(""" |