Skip to content

Instantly share code, notes, and snippets.

@lamoboos223
Created August 22, 2024 10:59
Show Gist options
  • Save lamoboos223/650a611c84c2f2b90f19c686423a44a1 to your computer and use it in GitHub Desktop.
Save lamoboos223/650a611c84c2f2b90f19c686423a44a1 to your computer and use it in GitHub Desktop.
'''
Steps for making a RAG system in Langchain framework
1. Prepare the data (knowledge base)
2. Use the Loader object to load the data in the proper format
3. Chunk the data into appropriate size
4. Create Embedding and Retriever
4.1. Use Embedding model like BAAI/bge-base-en-v1.5 from HuggingFace
4.2. Create a Vector database like FAISS (Facebook AI Similarity Search). And the Retriever is a an object from the DB class. E.g. `retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})`
5. Load quantized model
6. Setup the LLM chain
6.1. Create a text_generation pipeline using the loaded model and its tokenizer
6.2. Create a prompt template - this should follow the format of the model, so if you substitute the model checkpoint, make sure to use the appropriate formatting
6.3. Combine the llm_chain with the retriever to create a RAG chain
'''
import torch
from transformers import pipeline
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# Step 1: Creating the pdf file ✓
# Step 2: Loading the pdf file using the LangChain PDF loader ✓
loader = PyPDFLoader("./rag.pdf")
docs = loader.load()
print(f"Length of documents before chunking: {len(docs)}")
# Step 3: Create Chunks from the previously created documents ✓
splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=30)
chunked_docs = splitter.split_documents(docs)
print(f"Length of documents after chunking: {len(chunked_docs)}")
# Step 4: Create Embeddings for the chunked documents
# 4.1. Import the model BAAI/bge-base-en-v1.5 from HuggingFace And create the vector DB using FAISS library ✓
db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5"))
# 4.2. Derieve the Retriever out of the vector DB ✓
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
# Step 5: Load quantized model (The LLM that will be used for text generation) ✓
model_name = "HuggingFaceH4/zephyr-7b-beta"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Step 6: Setup the LLM chain
# 6.1. Create a text_generation pipeline using the loaded model and its tokenizer ✓
text_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
temperature=0.2,
do_sample=True,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=400,
)
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
# 6.2. Create a prompt template ✓
prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:
{context}
</s>
<|user|>
{question}
</s>
<|assistant|>
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=prompt_template,
)
llm_chain = prompt | llm | StrOutputParser()
# 6.3. Combine the llm_chain with the retriever to create a RAG chain ✓
retriever = db.as_retriever()
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain
# Test ✓
question = "what is the author name?"
without_rag_result = llm_chain.invoke({"context": "", "question": question})
with_rag_result = rag_chain.invoke(question)
print(f"Without RAG:\t {without_rag_result}\nWith RAG:\t {with_rag_result}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment