from txtai.pipeline import Textractor
textractor = Textractor(backend="docling", headers={"user-agent": "Mozilla/5.0"})
textractor("https://miro.medium.com/v2/resize:fit:720/format:webp/1*HHPVwIrcxYcLRvjDpwLQyQ.png")
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import soundfile as sf | |
| from txtai.pipeline import TextToSpeech | |
| # Build pipeline | |
| tts = TextToSpeech("neuml/kokoro-int8-onnx") | |
| # Generate speech | |
| speech, rate = tts( | |
| """Have you ever considered having a snooty British accent? |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from smolagents import WebSearchTool | |
| from txtai import LLM | |
| def webrag(query): | |
| prompt = f""" | |
| Answer the following question using ONLY the context below. | |
| Query: {query} | |
| Context: {search(query)} | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai.pipeline import Textractor | |
| # Docling backend, split text by sections | |
| textractor = Textractor(sections=True, backend="docling") | |
| # BERT Paper | |
| textractor("https://arxiv.org/pdf/1810.04805") | |
| # PDF converted to Markdown, split on Markdown sections | |
| # ['## BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from datasets import load_dataset | |
| from sklearn.metrics import accuracy_score | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from txtai.pipeline import HFTrainer | |
| def metrics(pred): | |
| labels, preds = pred.label_ids, pred.predictions.argmax(-1) | |
| # Calculate accuracy | |
| return {"accuracy": accuracy_score(labels, preds)} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai import Embeddings | |
| from txtai.pipeline import Textractor | |
| urls = "https://github.com/neuml/txtai" | |
| textractor = Textractor(chunker="semantic") | |
| embeddings = Embeddings(backend="ggml", ggml={"quantize": "q4_0"}) | |
| embeddings.index((url, x) for x in textractor(url)) | |
| embeddings.save("gguf") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from txtai import Agent | |
| agent = Agent( | |
| tools=["http://mcp.server/path"], | |
| model="LLM path" | |
| ) |
