Created
August 22, 2024 10:59
-
-
Save lamoboos223/650a611c84c2f2b90f19c686423a44a1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Steps for making a RAG system in Langchain framework | |
1. Prepare the data (knowledge base) | |
2. Use the Loader object to load the data in the proper format | |
3. Chunk the data into appropriate size | |
4. Create Embedding and Retriever | |
4.1. Use Embedding model like BAAI/bge-base-en-v1.5 from HuggingFace | |
4.2. Create a Vector database like FAISS (Facebook AI Similarity Search). And the Retriever is a an object from the DB class. E.g. `retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})` | |
5. Load quantized model | |
6. Setup the LLM chain | |
6.1. Create a text_generation pipeline using the loaded model and its tokenizer | |
6.2. Create a prompt template - this should follow the format of the model, so if you substitute the model checkpoint, make sure to use the appropriate formatting | |
6.3. Combine the llm_chain with the retriever to create a RAG chain | |
''' | |
import torch | |
from transformers import pipeline | |
from langchain.prompts import PromptTemplate | |
from langchain_community.vectorstores import FAISS | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
# Step 1: Creating the pdf file ✓ | |
# Step 2: Loading the pdf file using the LangChain PDF loader ✓ | |
loader = PyPDFLoader("./rag.pdf") | |
docs = loader.load() | |
print(f"Length of documents before chunking: {len(docs)}") | |
# Step 3: Create Chunks from the previously created documents ✓ | |
splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=30) | |
chunked_docs = splitter.split_documents(docs) | |
print(f"Length of documents after chunking: {len(chunked_docs)}") | |
# Step 4: Create Embeddings for the chunked documents | |
# 4.1. Import the model BAAI/bge-base-en-v1.5 from HuggingFace And create the vector DB using FAISS library ✓ | |
db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")) | |
# 4.2. Derieve the Retriever out of the vector DB ✓ | |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4}) | |
# Step 5: Load quantized model (The LLM that will be used for text generation) ✓ | |
model_name = "HuggingFaceH4/zephyr-7b-beta" | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Step 6: Setup the LLM chain | |
# 6.1. Create a text_generation pipeline using the loaded model and its tokenizer ✓ | |
text_generation_pipeline = pipeline( | |
model=model, | |
tokenizer=tokenizer, | |
task="text-generation", | |
temperature=0.2, | |
do_sample=True, | |
repetition_penalty=1.1, | |
return_full_text=True, | |
max_new_tokens=400, | |
) | |
llm = HuggingFacePipeline(pipeline=text_generation_pipeline) | |
# 6.2. Create a prompt template ✓ | |
prompt_template = """ | |
<|system|> | |
Answer the question based on your knowledge. Use the following context to help: | |
{context} | |
</s> | |
<|user|> | |
{question} | |
</s> | |
<|assistant|> | |
""" | |
prompt = PromptTemplate( | |
input_variables=["context", "question"], | |
template=prompt_template, | |
) | |
llm_chain = prompt | llm | StrOutputParser() | |
# 6.3. Combine the llm_chain with the retriever to create a RAG chain ✓ | |
retriever = db.as_retriever() | |
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain | |
# Test ✓ | |
question = "what is the author name?" | |
without_rag_result = llm_chain.invoke({"context": "", "question": question}) | |
with_rag_result = rag_chain.invoke(question) | |
print(f"Without RAG:\t {without_rag_result}\nWith RAG:\t {with_rag_result}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment