Last active
August 18, 2021 14:30
-
-
Save davidmezzetti/30117c7c93fa81e92d670bc5daebc879 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from txtai.embeddings import Embeddings | |
# Create embeddings model, backed by sentence-transformers & transformers | |
embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"}) | |
data = ["US tops 5 million confirmed virus cases", | |
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg", | |
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate", | |
"The National Park Service warns against sacrificing slower friends in a bear attack", | |
"Maine man wins $1M from $25 lottery ticket", | |
"Make huge profits without work, earn up to $100,000 a day"] | |
print("%-20s %s" % ("Query", "Best Match")) | |
print("-" * 50) | |
for query in ("feel good story", "climate change", "public health story", "war", "wildlife", | |
"asia", "lucky", "dishonest junk"): | |
# Get index of best section that best matches query | |
uid = embeddings.similarity(query, data)[0][0] | |
print("%-20s %s" % (query, data[uid])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment