Skip to content

Instantly share code, notes, and snippets.

@SandyLudosky
Created December 23, 2023 14:48
Show Gist options
  • Save SandyLudosky/ceafa0a12fbf620c56034db3c877eae5 to your computer and use it in GitHub Desktop.
Save SandyLudosky/ceafa0a12fbf620c56034db3c877eae5 to your computer and use it in GitHub Desktop.
Retrieval Augmented Generation (RAG)
import os
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
CharacterTextSplitter,
)
from langchain.prompts.chat import (
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import Chroma
load_dotenv()
# https://python.langchain.com/docs/modules/data_connection/vectorstores/
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGUAGE_MODEL = "gpt-3.5-turbo-instruct"
template: str = """/
You are a customer support specialist /
question: {question}. You assist users with general inquiries based on {context} only/
and technical issues. /
if you do not know the answer to a question, you can redirect the user to [email protected] /
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_message_prompt = HumanMessagePromptTemplate.from_template(
input_variables=["question", "context"],
template="{question}",
)
chat_prompt_template = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
model = ChatOpenAI()
def format_docs(docs):
return "\n\n".join([d.page_content for d in docs])
def load_documents():
"""Load a file from path, split it into chunks, embed each chunk and load it into the vector store."""
# load the document and split it into chunks
loader = TextLoader("./docs/faq.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
return text_splitter.split_documents(documents)
def load_embeddings(documents, user_query):
"""Create a vector store from a set of documents."""
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(documents, embedding_function)
# load it into Chroma
return db.as_retriever()
def generate_response(retriever, query):
"""Generate a response from a retriever and a query."""
pass
# Create a prompt template using a template from the config module and input variables
# representing the context and question.
# create the prompt
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| chat_prompt_template
| model
| StrOutputParser()
)
return chain.invoke(query)
def query(query):
documents = load_documents()
print(f"Loaded {len(documents)} documents.")
retriever = load_embeddings(documents, query)
return generate_response(retriever, query)
response = query("Do you ship to Europe?")
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment