Created
June 3, 2023 15:16
-
-
Save ranfysvalle02/d7aa0e3a7a0f194a6b19e7bae24ff7bc to your computer and use it in GitHub Desktop.
MongoDB Atlas Vector Search + LangChain Agents = ✨MAGIC✨
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.mongodb.com/docs/atlas/atlas-search/knn-beta/ | |
import pymongo | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.llms import AzureOpenAI | |
import os | |
from rake_nltk import Rake | |
from langchain.agents import initialize_agent, Tool | |
MONGO_URI = "mongodb+srv://....mongodb.net/?retryWrites=true&w=majority&tls=true" | |
MONGODB_DATABASE = "demo_db" | |
MONGODB_COLLECTION = "demo_coll" | |
QUESTION = "Why should a vehicle rental company consider applying Internet of Things to their business now using MongoDB?" | |
os.environ["OPENAI_API_TYPE"] = "azure" | |
os.environ["OPENAI_API_KEY"] = "" | |
os.environ["OPENAI_API_BASE"] = "https://.openai.azure.com" | |
azureEmbeddings = OpenAIEmbeddings( | |
deployment="", | |
model="text-embedding-ada-002", | |
openai_api_base="https://.openai.azure.com", | |
openai_api_key="", | |
openai_api_type="azure") | |
llm = AzureOpenAI( | |
deployment_name="", | |
model_name="gpt-35-turbo", | |
openai_api_base="https://.openai.azure.com", | |
openai_api_key="", | |
temperature=0.7 | |
) | |
def get_embedding(text): | |
return azureEmbeddings.embed_query(text) | |
def vs_tool(Q): | |
# Q is the "thought" coming from the AGENT! | |
vectorizedQuery = get_embedding(str(Q)) | |
# Connect to the MongoDB server | |
client = pymongo.MongoClient(MONGO_URI) | |
# Get the collection | |
collection = client[MONGODB_DATABASE][MONGODB_COLLECTION] | |
pipeline = [ | |
{ | |
"$search": { | |
"knnBeta": { | |
"vector": vectorizedQuery, | |
"path": "embeddings", | |
"k": 50 | |
} | |
} | |
}, | |
{ | |
"$project": {"embeddings":0} | |
}, | |
{ | |
"$limit": 5 #lets assume the first 5 chunks is the most useful | |
} | |
] | |
results = collection.aggregate(pipeline) | |
# Use the vector-search results to provide an enhanced context for the AI model | |
context = "" | |
for i,result in enumerate(results): | |
context += "\n"+str(i)+") "+result["content"]+"\n" | |
return "You are a MongoDB expert. You always think analytically and critically. Summarize and use this context: "+context+" \n to answer the question" | |
def main(): | |
vector_search_tool = Tool.from_function( | |
func=vs_tool, | |
name="MongoDB Modernization Tool", | |
description="useful for when you need to answer questions about Car Rental companies, MongoDB and IoT.", | |
# coroutine= ... <- you can specify an async method if desired as well | |
) | |
agent = initialize_agent( | |
agent="zero-shot-react-description", | |
tools=[vector_search_tool], | |
llm=llm, | |
verbose=True, # I will use verbose=True to check process of choosing tool by Agent | |
max_iterations=9 | |
) | |
r_1 = agent("Use your tools to answer:"+QUESTION) | |
print(f"Final answer: {r_1['output']}") | |
main() |
Author
ranfysvalle02
commented
Jun 3, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment