Created
July 1, 2024 13:01
-
-
Save davidmezzetti/43a60af6e135a031c0bfb4de02bc11f4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import faiss | |
from llama_index.core.schema import TextNode | |
from llama_index.core.vector_stores import VectorStoreQuery | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.vector_stores.faiss import FaissVectorStore | |
# Data to index | |
data = [ | |
"US tops 5 million confirmed virus cases", | |
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg", | |
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate", | |
"The National Park Service warns against sacrificing slower friends in a bear attack", | |
"Maine man wins $1M from $25 lottery ticket", | |
"Make huge profits without work, earn up to $100,000 a day" | |
] | |
# Create vector store | |
embeddings = HuggingFaceEmbedding( | |
model_name="sentence-transformers/nli-mpnet-base-v2", | |
) | |
store = FaissVectorStore(faiss_index=faiss.IndexFlatL2(768)) | |
store.add([TextNode(id=i, text=x, | |
embedding=embeddings.get_text_embedding(x)) for i, x in enumerate(data)] | |
) | |
# Search with results as documents | |
query = embeddings.get_text_embedding("feel good story") | |
results = store.query(VectorStoreQuery(query_embedding=query)) | |
pl.from_dicts([results]) | |
# ┌───────┬──────────────┬───────────┐ | |
# │ nodes ┆ similarities ┆ ids │ | |
# │ --- ┆ --- ┆ --- │ | |
# │ null ┆ list[f32] ┆ list[str] │ | |
# ╞═══════╪══════════════╪═══════════╡ | |
# │ null ┆ [1.83342] ┆ ["4"] │ | |
# └───────┴──────────────┴───────────┘ | |
################################################# | |
import polars as pl | |
import txtai | |
# Create vector store. Uses SQLite + Faiss. | |
embeddings = txtai.Embeddings( | |
path="sentence-transformers/nli-mpnet-base-v2", | |
content=True | |
) | |
embeddings.index({"text": x, "length": len(x)} for x in data) | |
# Search with dynamic columns and SQL | |
results = embeddings.search( | |
"""SELECT id, text, length, score | |
FROM txtai | |
WHERE similar('feel good story')""", 1) | |
# Polars and Pandas DataFrame support | |
pl.from_dicts(results) | |
# ┌─────┬─────────────────────────────────┬────────┬──────────┐ | |
# │ id ┆ text ┆ length ┆ score │ | |
# │ --- ┆ --- ┆ --- ┆ --- │ | |
# │ str ┆ str ┆ i64 ┆ f64 │ | |
# ╞═════╪═════════════════════════════════╪════════╪══════════╡ | |
# │ 4 ┆ Maine man wins $1M from $25 lo… ┆ 42 ┆ 0.08329 │ | |
# └─────┴─────────────────────────────────┴────────┴──────────┘ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment