Skip to content

Instantly share code, notes, and snippets.

@ranfysvalle02
Last active February 23, 2024 04:59
Show Gist options
  • Save ranfysvalle02/ea684e630346ef8bdb99433569b431f9 to your computer and use it in GitHub Desktop.
Save ranfysvalle02/ea684e630346ef8bdb99433569b431f9 to your computer and use it in GitHub Desktop.
MongoDB + Fireworks
API_KEY = "API_KEY_HERE"
MODEL = "accounts/fireworks/models/mixtral-8x7b-instruct"
QUESTION = "What is MongoDB?"
MONGODB_URI = "mongodb+srv://<uri-goes-here>"
import openai
from pydantic import BaseModel
import json
class Result(BaseModel):
answer: str
client = openai.OpenAI(
base_url = "https://api.fireworks.ai/inference/v1",
api_key=API_KEY,
)
# WOW that was easy -- Where is MongoDB though?
import pymongo
from langchain.embeddings import GPT4AllEmbeddings
gpt4all_embd = GPT4AllEmbeddings()
mdb_client = pymongo.MongoClient(MONGODB_URI)
db = mdb_client["apollo-salesops"]
collection = db["irag"]
def recall(
text, n_docs=2, min_rel_score=0.25, chunk_max_length=1800, unique=True
):
response = collection.aggregate(
[
{
"$vectorSearch": {
"index": "default",
"queryVector": gpt4all_embd.embed_query(text),
"path": "embedding",
# "filter": {},
"limit": 15, # Number (of type int only) of documents to return in the results. Value can't exceed the value of numCandidates.
"numCandidates": 50, # Number of nearest neighbors to use during the search. You can't specify a number less than the number of documents to return (limit).
}
},
{"$addFields": {"score": {"$meta": "vectorSearchScore"}}},
{"$match": {"score": {"$gte": min_rel_score}}},
{"$project": {"score": 1, "_id": 0, "source": 1, "text": 1}},
]
)
tmp_docs = []
str_response = []
# Interate over the results
for d in response:
if len(tmp_docs) == n_docs:
break
if unique and d["source"] in tmp_docs:
continue
tmp_docs.append(d["source"])
str_response.append(
{
"URL": d["source"],
"content": d["text"][:chunk_max_length],
"score": d["score"],
}
)
kb_output = (
f"RAG Knowledgebase Results[{len(tmp_docs)}]:\n```{str(str_response)}```\n## \n```SOURCES: "
+ str(tmp_docs)
+ "```\n\n"
)
return (kb_output)
chat_completion = client.chat.completions.create(
model=MODEL,
response_format={"type": "json_object", "schema": Result.schema_json()},
messages=[
{
"role": "user",
"content": f"Using this context: {recall(QUESTION)} \n\n Answer the following question: {QUESTION} [important]Reply just in one JSON.[/important]",
},
],
)
print("QUESTION:"+QUESTION)
print("\n\n"+f"Using this context: {recall(QUESTION)} \n\n Answer the following question: {QUESTION} [important]Reply just in one JSON.[/important]")
print("\n\n")
print(json.loads(chat_completion.choices[0].message.content)['answer'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment