Last active
November 5, 2024 15:32
-
-
Save czue/8fe729720c56008c5ef459f6299ebf34 to your computer and use it in GitHub Desktop.
sample code for chatgpt / gpt4 langchain Q&A
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# see https://www.youtube.com/watch?v=CsFpVdgEXCU for details of what this does | |
from typing import List | |
from langchain import ConversationChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.indexes.vectorstore import VectorStoreIndexWrapper | |
from langchain.memory import ConversationBufferMemory | |
from langchain.prompts import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
MessagesPlaceholder, | |
SystemMessagePromptTemplate, | |
) | |
from langchain.schema import Document | |
from langchain.vectorstores import VectorStore | |
MIN_DOCUMENT_LENGTH = 20 | |
SYSTEM_PROMPT = """ | |
You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base | |
(labeled with DOCUMENT) and a question. | |
Answer the question using information from the knowledge base. | |
If the answer is not available in the documents or there are no documents, | |
still try to answer the question, but say that you used your general knowledge and not the documentation. | |
""" | |
SYSTEM_PROMPT_WITH_SOURCES = """ | |
You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base | |
(labeled with DOCUMENT and SOURCE) and a question. | |
Answer the question using information from the knowledge base, including references ("SOURCES"). | |
If you don't know the answer, just say that you don't know. Don't try to make up an answer. | |
ALWAYS return a "SOURCES" part in your answer. | |
""" | |
DOCUMENT_TEMPLATE = """ | |
------------ BEGIN DOCUMENT ------------- | |
{content} | |
------------- END DOCUMENT -------------- | |
""" | |
DOCUMENT_TEMPLATE_WITH_SOURCE = """ | |
------------ BEGIN DOCUMENT ------------- | |
--------------- CONTENT ----------------- | |
{content} | |
---------------- SOURCE ----------------- | |
{source} | |
------------- END DOCUMENT -------------- | |
""" | |
PROMPT_TEMPLATE = """ | |
=========== BEGIN DOCUMENTS ============= | |
{documents} | |
============ END DOCUMENTS ============== | |
Question: {question} | |
""" | |
def construct_prompt(documents: List[Document], question: str): | |
return PROMPT_TEMPLATE.format( | |
documents="\n".join([construct_document_prompt(d) for d in documents]), | |
question=question, | |
) | |
def construct_document_prompt(document: Document) -> str: | |
return DOCUMENT_TEMPLATE.format(content=document.page_content, source=document.metadata["source"]) | |
def filter_documents(documents: List[Document]) -> List[Document]: | |
return [d for d in documents if len(d.page_content) > MIN_DOCUMENT_LENGTH] | |
def query_db(db: VectorStore, query: str): | |
index = VectorStoreIndexWrapper(vectorstore=db) | |
llm = ChatOpenAI(temperature=0) | |
retriever = index.vectorstore.as_retriever() | |
documents = retriever.get_relevant_documents(query) | |
documents = filter_documents(documents) | |
chat_input = construct_prompt(documents, question=query) | |
system_prompt = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT) | |
# todo: add memory | |
memory = ConversationBufferMemory(return_messages=True) | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
system_prompt, | |
MessagesPlaceholder(variable_name="history"), | |
HumanMessagePromptTemplate.from_template("{input}"), | |
] | |
) | |
conversation = ConversationChain(memory=memory, prompt=prompt, llm=llm) | |
response = conversation.predict(input=chat_input) | |
# response = index.query_with_sources(query, llm=llm) | |
# make this look like the old API for now | |
return {"answer": response} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment