Last active
April 10, 2024 07:29
-
-
Save Cotchi666/e08ef1b3e8563aa7e2f4dac66d1e0a5d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "2dc5dac2-d2f2-4e2d-b0fe-bd21791b411d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"from dotenv import load_dotenv\n", | |
"from operator import itemgetter\n", | |
"load_dotenv()\n", | |
"\n", | |
"from langchain_community.document_loaders import PyPDFLoader\n", | |
"from langchain_text_splitters import CharacterTextSplitter\n", | |
"from langchain.document_loaders.csv_loader import CSVLoader\n", | |
"\n", | |
"from langchain.docstore.document import Document\n", | |
"\n", | |
"from langchain_community.vectorstores import Neo4jVector\n", | |
"\n", | |
"from langchain_openai import OpenAIEmbeddings\n", | |
"from langchain.chat_models import ChatOpenAI\n", | |
"from langchain.chains import RetrievalQAWithSourcesChain\n", | |
"from langchain_core.prompts import ChatPromptTemplate\n", | |
"from langchain_core.runnables import RunnableLambda\n", | |
"from langchain.prompts import PromptTemplate\n", | |
"from langchain.chains.question_answering import load_qa_chain\n", | |
"from langchain.chat_models import ChatOpenAI\n", | |
"from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory\n", | |
"from langchain.schema import messages_from_dict, messages_to_dict\n", | |
"from langchain.memory import ConversationBufferMemory\n", | |
"from langchain.chains import ConversationalRetrievalChain, ConversationChain\n", | |
"import json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "018bb9ed-35f9-4262-9c24-6a9eb5dba946", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"embedding_model = OpenAIEmbeddings()\n", | |
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", | |
"URL = \"neo4j+ssc://ba68e959.databases.neo4j.io\"\n", | |
"USERNAME = \"neo4j\"\n", | |
"PASSWORD = os.getenv('NEO4J_PASSWORD') \n", | |
"\n", | |
"original_chain = ConversationChain(\n", | |
" llm=llm,\n", | |
" verbose=True,\n", | |
" memory=ConversationBufferMemory()\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "3af61675-08fe-443a-afff-fa9340339cc4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n", | |
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", | |
"Prompt after formatting:\n", | |
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", | |
"\n", | |
"Current conversation:\n", | |
"Human: what do you know about Python in less than 10 words\n", | |
"AI: Python is a high-level programming language used for various applications.\n", | |
"Human: what do you know about Python in less than 10 words\n", | |
"AI:\u001b[0m\n", | |
"\n", | |
"\u001b[1m> Finished chain.\u001b[0m\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"'Python is known for its readability and versatility.'" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"original_chain.run('what do you know about Python in less than 10 words')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "d2e89ec8-20dc-422a-a760-9059c307831e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[HumanMessage(content='what do you know about Python in less than 10 words'),\n", | |
" AIMessage(content='Python is a high-level programming language used for various applications.'),\n", | |
" HumanMessage(content='what do you know about Python in less than 10 words'),\n", | |
" AIMessage(content='Python is known for its readability and versatility.')]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"extracted_messages = original_chain.memory.chat_memory.messages\n", | |
"extracted_messages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "127ab7a5-8255-43c3-b11f-22369a9ba1cf", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'type': 'human',\n", | |
" 'data': {'content': 'what do you know about Python in less than 10 words',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'human',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'ai',\n", | |
" 'data': {'content': 'Python is a high-level programming language used for various applications.',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'ai',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'human',\n", | |
" 'data': {'content': 'what do you know about Python in less than 10 words',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'human',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'ai',\n", | |
" 'data': {'content': 'Python is known for its readability and versatility.',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'ai',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}}]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ingest_to_db = messages_to_dict(extracted_messages)\n", | |
"ingest_to_db" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "62f9dc2e-5d6d-4d43-a83a-5994a93f7df5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'type': 'human',\n", | |
" 'data': {'content': 'what do you know about Python in less than 10 words',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'human',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'ai',\n", | |
" 'data': {'content': 'Python is a high-level programming language used for various applications.',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'ai',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'human',\n", | |
" 'data': {'content': 'what do you know about Python in less than 10 words',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'human',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}},\n", | |
" {'type': 'ai',\n", | |
" 'data': {'content': 'Python is known for its readability and versatility.',\n", | |
" 'additional_kwargs': {},\n", | |
" 'response_metadata': {},\n", | |
" 'type': 'ai',\n", | |
" 'name': None,\n", | |
" 'id': None,\n", | |
" 'example': False}}]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"retrieve_from_db = json.loads(json.dumps(ingest_to_db))\n", | |
"retrieve_from_db" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "79af6011-fb24-45d0-9bab-2a3622f0346a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[HumanMessage(content='what do you know about Python in less than 10 words'),\n", | |
" AIMessage(content='Python is a high-level programming language used for various applications.'),\n", | |
" HumanMessage(content='what do you know about Python in less than 10 words'),\n", | |
" AIMessage(content='Python is known for its readability and versatility.')]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"retrieved_messages = messages_from_dict(retrieve_from_db)\n", | |
"retrieved_messages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "cf1630cb-31b1-4e6c-9d71-e7caead1efbb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"ChatMessageHistory(messages=[HumanMessage(content='what do you know about Python in less than 10 words'), AIMessage(content='Python is a high-level programming language used for various applications.'), HumanMessage(content='what do you know about Python in less than 10 words'), AIMessage(content='Python is known for its readability and versatility.')])" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"retrieved_chat_history = ChatMessageHistory(messages=retrieved_messages)\n", | |
"retrieved_chat_history" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "09cb16e5-374d-4b85-a887-9e5f171acca7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='what do you know about Python in less than 10 words'), AIMessage(content='Python is a high-level programming language used for various applications.'), HumanMessage(content='what do you know about Python in less than 10 words'), AIMessage(content='Python is known for its readability and versatility.')]))" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"retrieved_memory = ConversationBufferMemory(chat_memory=retrieved_chat_history)\n", | |
"retrieved_memory" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "d8e7d022-8291-4027-938c-5d8450e9dca4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n", | |
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", | |
"Prompt after formatting:\n", | |
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", | |
"\n", | |
"Current conversation:\n", | |
"Human: what do you know about Python in less than 10 words\n", | |
"AI: Python is a high-level programming language used for various applications.\n", | |
"Human: what do you know about Python in less than 10 words\n", | |
"AI: Python is known for its readability and versatility.\n", | |
"Human: what about Javascript and Ruby\n", | |
"AI:\u001b[0m\n", | |
"\n", | |
"\u001b[1m> Finished chain.\u001b[0m\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"'JavaScript is a popular scripting language for web development. Ruby is a dynamic, object-oriented programming language.'" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reloaded_chain = ConversationChain(\n", | |
" llm=llm,\n", | |
" verbose=True,\n", | |
" memory=retrieved_memory\n", | |
")\n", | |
"reloaded_chain.run('what about Javascript and Ruby')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"id": "10b9e312-9cab-4e69-a1d4-8d3c1b84f327", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vectorstore = Neo4jVector.from_existing_index(\n", | |
" embedding_model,\n", | |
" url=URL,\n", | |
" username=USERNAME,\n", | |
" password=PASSWORD,\n", | |
" node_label=\"Chunk\",\n", | |
" index_name=\"vector\",\n", | |
")\n", | |
"temp_history = []\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"id": "cdb227cd-0e74-495e-a557-46215d7cbfea", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'<tag>Chào Finn, rất vui được gặp bạn! Tôi sẽ ghi nhớ tên của bạn và sử dụng nó trong các cuộc trò chuyện tiếp theo. Bạn có thể cho tôi biết thêm về bản thân mình không? Tôi có thể giúp gì cho bạn?</tag>'" | |
] | |
}, | |
"execution_count": 60, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"\n", | |
"template = \"\"\"\n", | |
"Given the following extracted parts of a long document and a question, create a final answer,\n", | |
"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.\n", | |
"If the AI does not know the answer to a question, it truthfully says it does not know.\n", | |
"When providing responses to users, ensure they are presented using only tag in HTML.\n", | |
"{context}\n", | |
"Relevant pieces of previous conversation:\n", | |
"{chat_history}\n", | |
"(You do not need to use these pieces of information if not relevant)\n", | |
"Answer language: Vietnamese\n", | |
"Human: {human_input}\n", | |
"Chatbot:\n", | |
"\"\"\"\n", | |
"prompt = PromptTemplate(\n", | |
" input_variables=[\"chat_history\", \"human_input\", \"context\"],\n", | |
" template=template,\n", | |
")\n", | |
"\n", | |
"# Convert chat history to LangChain message format\n", | |
"retrieved_messages = messages_from_dict(temp_history)\n", | |
"retrieved_chat_history = ChatMessageHistory(messages=retrieved_messages)\n", | |
"\n", | |
"# Set up conversation memory\n", | |
"memory = ConversationBufferMemory(\n", | |
" memory_key=\"chat_history\",\n", | |
" input_key=\"human_input\",\n", | |
" chat_memory=retrieved_chat_history,\n", | |
")\n", | |
"\n", | |
"# Load LangChain model\n", | |
"chain = load_qa_chain(llm, chain_type=\"stuff\", memory=memory, prompt=prompt)\n", | |
"\n", | |
"# Process query with LangChain 1\n", | |
"query = \"Tôi tên gì?\"\n", | |
"docs = vectorstore.similarity_search(query)\n", | |
"rs = chain(\n", | |
" {\"input_documents\": docs, \"human_input\": query},\n", | |
" return_only_outputs=True,\n", | |
")\n", | |
"new_chat_history = messages_to_dict(chain.memory.chat_memory.messages)\n", | |
"temp_history = new_chat_history\n", | |
"rs[\"output_text\"]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2151a455-336c-4aeb-95fc-6d2be67d0346", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment