-
-
Save jesusninoc/ac13e0078aa12c893db8d324e7b3b01d to your computer and use it in GitHub Desktop.
MongoDB Atlas Vector Search + LangChain Agents = ✨MAGIC✨
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.mongodb.com/docs/atlas/atlas-search/knn-beta/ | |
import pymongo | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.llms import AzureOpenAI | |
import os | |
from rake_nltk import Rake | |
from langchain.agents import initialize_agent, Tool | |
MONGO_URI = "mongodb+srv://....mongodb.net/?retryWrites=true&w=majority&tls=true" | |
MONGODB_DATABASE = "demo_db" | |
MONGODB_COLLECTION = "demo_coll" | |
QUESTION = "Why should a vehicle rental company consider applying Internet of Things to their business now using MongoDB?" | |
os.environ["OPENAI_API_TYPE"] = "azure" | |
os.environ["OPENAI_API_KEY"] = "" | |
os.environ["OPENAI_API_BASE"] = "https://.openai.azure.com" | |
azureEmbeddings = OpenAIEmbeddings( | |
deployment="", | |
model="text-embedding-ada-002", | |
openai_api_base="https://.openai.azure.com", | |
openai_api_key="", | |
openai_api_type="azure") | |
llm = AzureOpenAI( | |
deployment_name="", | |
model_name="gpt-35-turbo", | |
openai_api_base="https://.openai.azure.com", | |
openai_api_key="", | |
temperature=0.7 | |
) | |
def get_embedding(text): | |
return azureEmbeddings.embed_query(text) | |
def vs_tool(Q): | |
# Q is the "thought" coming from the AGENT! | |
vectorizedQuery = get_embedding(str(Q)) | |
# Connect to the MongoDB server | |
client = pymongo.MongoClient(MONGO_URI) | |
# Get the collection | |
collection = client[MONGODB_DATABASE][MONGODB_COLLECTION] | |
pipeline = [ | |
{ | |
"$search": { | |
"knnBeta": { | |
"vector": vectorizedQuery, | |
"path": "embeddings", | |
"k": 50 | |
} | |
} | |
}, | |
{ | |
"$project": {"embeddings":0} | |
}, | |
{ | |
"$limit": 5 #lets assume the first 5 chunks is the most useful | |
} | |
] | |
results = collection.aggregate(pipeline) | |
# Use the vector-search results to provide an enhanced context for the AI model | |
context = "" | |
for i,result in enumerate(results): | |
context += "\n"+str(i)+") "+result["content"]+"\n" | |
return "You are a MongoDB expert. You always think analytically and critically. Summarize and use this context: "+context+" \n to answer the question" | |
def main(): | |
vector_search_tool = Tool.from_function( | |
func=vs_tool, | |
name="MongoDB Modernization Tool", | |
description="useful for when you need to answer questions about Car Rental companies, MongoDB and IoT.", | |
# coroutine= ... <- you can specify an async method if desired as well | |
) | |
agent = initialize_agent( | |
agent="zero-shot-react-description", | |
tools=[vector_search_tool], | |
llm=llm, | |
verbose=True, # I will use verbose=True to check process of choosing tool by Agent | |
max_iterations=9 | |
) | |
r_1 = agent("Use your tools to answer:"+QUESTION) | |
print(f"Final answer: {r_1['output']}") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment