This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from txtai.pipeline import Transcription, Translation | |
from txtai.workflow import FileTask | |
# Transcription instance | |
transcribe = Transcription("facebook/wav2vec2-large-960h") | |
# Create a translation instance | |
translate = Translation() | |
tasks = [ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run initial query | |
uid = embeddings.search("feel good story", 1)[0][0] | |
print("Initial: ", data[uid]) | |
# Update data | |
data[0] = "See it: baby panda born" | |
embeddings.upsert([(0, data[0], None)]) | |
uid = embeddings.search("feel good story", 1)[0][0] | |
print("After update: ", data[uid]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install txtai | |
from txtai.embeddings import Embeddings | |
import requests | |
# Embeddings with sentence-transformers backend | |
embeddings = Embeddings({"method": "transformers", "path": "sentence-transformers/paraphrase-mpnet-base-v2"}) | |
# Query HN | |
data = [x["title"] for x in requests.get("https://hn.algolia.com/api/v1/search?tags=front_page").json()["hits"]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install txtai | |
from txtai.pipeline import Textractor | |
# Extract text from document into string | |
textractor = Textractor() | |
textractor("article.pdf") | |
# Extract text from document as list of sentences | |
textractor = Textractor(sentences=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datasets import load_dataset | |
from txtai.pipeline import HFTrainer, Labels | |
# Load 500 GLUE sst2 sentiment records (remove .select to train on full sst2 dataset) | |
ds = load_dataset("glue", "sst2")["train"].select(range(500)).flatten_indices() | |
# Train sentiment classifier with txtai | |
trainer = HFTrainer() | |
model, tokenizer = trainer("bert-base-uncased", ds, columns=("sentence", "label")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from txtai.embeddings import Embeddings | |
from txtai.models import OnnxModel | |
from txtai.pipeline import HFOnnx, Labels | |
# Export to model.onnx | |
path = "distilbert-base-uncased-finetuned-sst-2-english" | |
onnx = HFOnnx() | |
model = onnx(path, "text-classification", "classify.onnx") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from txtai.pipeline import HFTrainer | |
# Training data | |
data = [ | |
{"question": "What ingredient?", "context": "Pour 1 can whole tomatoes", "answers": "tomatoes"}, | |
{"question": "What ingredient?", "context": "Dice 1 yellow onion", "answers": "onion"}, | |
{"question": "What ingredient?", "context": "Cut 1 red pepper", "answers": "pepper"}, | |
{"question": "What ingredient?", "context": "Peel and dice 1 clove garlic", "answers": "garlic"}, | |
{"question": "What ingredient?", "context": "Put 1/2 lb beef", "answers": "beef"}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from txtai.pipeline import Similarity | |
# Use default sentiment analysis model | |
similarity = Similarity(dynamic=False) | |
# Query with label text | |
similarity("positive", ["I am happy", "I am mad"]) | |
# Query with label id | |
similarity("1", ["I am happy", "I am mad"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datasets import load_dataset | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.pipeline import Pipeline | |
from txtai.models import Models | |
from txtai.pipeline import MLOnnx | |
from transformers import pipeline |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import random | |
import torch | |
import numpy as np | |
from torch import nn | |
from torch.nn import CrossEntropyLoss | |
from transformers import AutoConfig, AutoTokenizer |