Created
January 23, 2022 12:03
-
-
Save davidmezzetti/75a2423d14b9ffb02fe846319eaafdf4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from txtai.embeddings import Embeddings | |
data = ["US tops 5 million confirmed virus cases", | |
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg", | |
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate", | |
"The National Park Service warns against sacrificing slower friends in a bear attack", | |
"Maine man wins $1M from $25 lottery ticket", | |
"Make huge profits without work, earn up to $100,000 a day"] | |
# Create embeddings index with content enabled. The default behavior is to only store indexed vectors. | |
embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2", "content": True, "objects": True}) | |
# Create an index for the list of text | |
embeddings.index([(uid, text, None) for uid, text in enumerate(data)]) | |
print("%-20s %s" % ("Query", "Best Match")) | |
print("-" * 50) | |
# Run an embeddings search for each query | |
for query in ("feel good story", "climate change", "public health story", "war", "wildlife", "asia", "lucky", "dishonest junk"): | |
# Extract text field from result | |
text = embeddings.search(query, 1)[0]["text"] | |
# Print text | |
print("%-20s %s" % (query, text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment