Skip to content

Instantly share code, notes, and snippets.

View davidmezzetti's full-sized avatar

David Mezzetti davidmezzetti

View GitHub Profile
##################################
# Data functions
##################################
import re
from datasets import load_dataset
def clean(text):
text = text.replace("\n", " ").strip()
from txtai import LLM
# Hugging Face models
llm = LLM("google/gemma-2-9b")
# llama.cpp models automatically downloaded from HF HUB
llm = LLM("bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf")
# Models served via APIs (OpenAI / Claude / Ollama)
llm = LLM("gpt-4o")
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_text_splitters import RecursiveCharacterTextSplitter
system = (
"You are an assistant for question-answering tasks. "
##################################
# Data functions
##################################
import re
from datasets import load_dataset
def clean(text):
text = text.replace("\n", " ").strip()
import polars as pl
import faiss
from llama_index.core.schema import TextNode
from llama_index.core.vector_stores import VectorStoreQuery
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.faiss import FaissVectorStore
# Data to index
data = [
"US tops 5 million confirmed virus cases",
from txtai import Embeddings, LLM
# Data to index
data = [
"US tops 5 million confirmed virus cases",
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
"The National Park Service warns against sacrificing slower friends in a bear attack",
"Maine man wins $1M from $25 lottery ticket",
"Make huge profits without work, earn up to $100,000 a day"
import polars as pl
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
# Data to index
data = [
"US tops 5 million confirmed virus cases",
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
"The National Park Service warns against sacrificing slower friends in a bear attack",
from txtai import Embeddings
data = [
"US tops 5 million confirmed virus cases",
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
"The National Park Service warns against sacrificing slower friends in a bear attack",
"Maine man wins $1M from $25 lottery ticket",
"Make huge profits without work, earn up to $100,000 a day"
]
embeddings:
path: sentence-transformers/nli-mpnet-base-v2
content: true
functions:
- {name: translation, argcount: 2, function: translation}
tabular:
idcolumn: url
textcolumns:
- title
translation:
summary:
path: sshleifer/distilbart-cnn-12-6
textractor:
join: true
lines: false
minlength: 100
paragraphs: true
sentences: false
translation: {}
workflow: