-
-
Save lisakim0/0204d7504d17cefceaf2d37261c1b7d5 to your computer and use it in GitHub Desktop.
import streamlit as st | |
from langchain_community.document_loaders import PDFPlumberLoader | |
from langchain_experimental.text_splitter import SemanticChunker | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.llms import Ollama | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.llm import LLMChain | |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain | |
from langchain.chains import RetrievalQA | |
# color palette | |
primary_color = "#1E90FF" | |
secondary_color = "#FF6347" | |
background_color = "#F5F5F5" | |
text_color = "#4561e9" | |
# Custom CSS | |
st.markdown(f""" | |
<style> | |
.stApp {{ | |
background-color: {background_color}; | |
color: {text_color}; | |
}} | |
.stButton>button {{ | |
background-color: {primary_color}; | |
color: white; | |
border-radius: 5px; | |
border: none; | |
padding: 10px 20px; | |
font-size: 16px; | |
}} | |
.stTextInput>div>div>input {{ | |
border: 2px solid {primary_color}; | |
border-radius: 5px; | |
padding: 10px; | |
font-size: 16px; | |
}} | |
.stFileUploader>div>div>div>button {{ | |
background-color: {secondary_color}; | |
color: white; | |
border-radius: 5px; | |
border: none; | |
padding: 10px 20px; | |
font-size: 16px; | |
}} | |
</style> | |
""", unsafe_allow_html=True) | |
# Streamlit app title | |
st.title("Build a RAG System with DeepSeek R1 & Ollama") | |
# Load the PDF | |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
if uploaded_file is not None: | |
# Save the uploaded file to a temporary location | |
with open("temp.pdf", "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
# Load the PDF | |
loader = PDFPlumberLoader("temp.pdf") | |
docs = loader.load() | |
# Split into chunks | |
text_splitter = SemanticChunker(HuggingFaceEmbeddings()) | |
documents = text_splitter.split_documents(docs) | |
# Instantiate the embedding model | |
embedder = HuggingFaceEmbeddings() | |
# Create the vector store and fill it with embeddings | |
vector = FAISS.from_documents(documents, embedder) | |
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3}) | |
# Define llm | |
llm = Ollama(model="deepseek-r1") | |
# Define the prompt | |
prompt = """ | |
1. Use the following pieces of context to answer the question at the end. | |
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n | |
3. Keep the answer crisp and limited to 3,4 sentences. | |
Context: {context} | |
Question: {question} | |
Helpful Answer:""" | |
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt) | |
llm_chain = LLMChain( | |
llm=llm, | |
prompt=QA_CHAIN_PROMPT, | |
callbacks=None, | |
verbose=True) | |
document_prompt = PromptTemplate( | |
input_variables=["page_content", "source"], | |
template="Context:\ncontent:{page_content}\nsource:{source}", | |
) | |
combine_documents_chain = StuffDocumentsChain( | |
llm_chain=llm_chain, | |
document_variable_name="context", | |
document_prompt=document_prompt, | |
callbacks=None) | |
qa = RetrievalQA( | |
combine_documents_chain=combine_documents_chain, | |
verbose=True, | |
retriever=retriever, | |
return_source_documents=True) | |
# User input | |
user_input = st.text_input("Ask a question related to the PDF :") | |
# Process user input | |
if user_input: | |
with st.spinner("Processing..."): | |
response = qa(user_input)["result"] | |
st.write("Response:") | |
st.write(response) | |
else: | |
st.write("Please upload a PDF file to proceed.") |
Are you able to also publish the required packages with the exact version number please? Installing the latest ones will end in a conflict. Thank you
I solved it by installing the following modules:
pip install -U langchain langchain-community
pip install langchain
pip install langchain_experimental
pip install streamlit
pip install pdfplumber
pip install semantic-chunkers
pip install open-text-embeddings
pip install ollama
pip install prompt-template
pip install sentence-transformers
pip install faiss
pip install faiss-cpu
Would you suggest the script to save the RAG training progress?
I’ve started using Poetry to create and manage my environment files.
https://github.com/henry3556108/rag
Can RAG be stored independently, such as files or databases?
Are you able to also publish the required packages with the exact version number please? Installing the latest ones will end in a conflict. Thank you