-
-
Save MichelNivard/e8fd2efae5114a189f40d9eb6ea18dc2 to your computer and use it in GitHub Desktop.
import tkinter | |
import customtkinter | |
from bs4 import BeautifulSoup | |
# Langchain loads: | |
from langchain.document_loaders import DirectoryLoader,PagedPDFSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS, Qdrant | |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
from langchain.llms import OpenAI | |
from langchain.prompts import PromptTemplate | |
# import filedialog module | |
from tkinter import filedialog | |
customtkinter.set_appearance_mode("System") # Modes: system (default), light, dark | |
customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green | |
app = customtkinter.CTk() # create CTk window like you do with the Tk window | |
app.geometry("1080x960") | |
OPENAI_API_KEY="YOUR API GOES HERE!" | |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
# Function for opening the | |
# file explorer window | |
def browseFiles(): | |
filename = filedialog.askdirectory() | |
loader = DirectoryLoader(filename, glob="*.pdf") | |
pdfs = loader.load_and_split() | |
faiss_index = FAISS.from_documents(pdfs,embeddings) | |
faiss_index.save_local("faiss_index") | |
# Function for opening the | |
# file explorer window | |
def browseDocStore(): | |
faiss_file = filedialog.askdirectory(initialdir = "/", title = "Select a Vectorstore Index") | |
faiss_index = FAISS.load_local(faiss_file , embeddings) | |
# Search Button | |
def button_function(): | |
# clear previous search | |
textbox.delete("0.0", "end") | |
textbox1.delete("0.0", "end") | |
textbox2.delete("0.0", "end") | |
textbox3.delete("0.0", "end") | |
textbox4.delete("0.0", "end") | |
textbox1b.delete("0.0", "end") | |
textbox2b.delete("0.0", "end") | |
textbox3b.delete("0.0", "end") | |
textbox4b.delete("0.0", "end") | |
# get question: | |
query = entry.get() | |
faiss_index = FAISS.load_local("faiss_index" , embeddings) | |
docs = faiss_index.similarity_search(query=query, k=5) | |
# set up prompt: | |
question_prompt_template = """Summarize the following text in 250 tokens or less: | |
{context}""" | |
QUESTION_PROMPT = PromptTemplate( | |
template=question_prompt_template, input_variables=["context"] | |
) | |
combine_prompt_template = """Given the following extracted parts of a number of long documents and a question, create a final answer. | |
If you don't know the answer, just say that you don't know. Don't try to make up an answer. | |
QUESTION: {question} | |
========= | |
{summaries} | |
========= | |
FINAL ANSWER:""" | |
COMBINE_PROMPT = PromptTemplate( | |
template=combine_prompt_template, input_variables=["summaries", "question"] | |
) | |
chain = load_qa_with_sources_chain(OpenAI(temperature=0,openai_api_key=OPENAI_API_KEY), chain_type="map_reduce", return_intermediate_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT) | |
# get answer and display the sources: | |
answer = chain({"input_documents": docs, "question": query}, return_only_outputs=False) | |
textbox.insert("0.0", answer["output_text"]) | |
textbox1.insert("0.0", answer["intermediate_steps"][0]) | |
textbox1b.insert("0.0", answer["input_documents"][0]) | |
textbox2.insert("0.0", answer["intermediate_steps"][1]) | |
textbox2b.insert("0.0", answer["input_documents"][1]) | |
textbox3.insert("0.0", answer["intermediate_steps"][2]) | |
textbox3b.insert("0.0", answer["input_documents"][2]) | |
textbox4.insert("0.0", answer["intermediate_steps"][3]) | |
textbox4b.insert("0.0", answer["input_documents"][3]) | |
# seach field | |
entry = customtkinter.CTkEntry(master=app, placeholder_text="query",width = 600) | |
entry.pack(padx=20, pady=10) | |
entry.place(relx=0.5, rely=0.03, anchor=tkinter.CENTER) | |
# output textbox | |
textbox = customtkinter.CTkTextbox(master=app, width = 750, height=150) | |
textbox.pack(pady=20, padx=10) | |
textbox.place(relx=0.57, rely=0.15, anchor=tkinter.CENTER) | |
tabview = customtkinter.CTkTabview(master=app, width=750) | |
tabview.pack(pady=20, padx=10) | |
tabview.place(relx=0.5, rely=0.6, anchor=tkinter.CENTER) | |
tabview.add("Source and Summary 1") | |
tabview.add("Source and Summary 2") | |
tabview.add("Source and Summary 3") | |
tabview.add("Source and Summary 4") | |
# Sources textbox | |
textbox1 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 1"), width = 950, height=150) | |
textbox1.pack(pady=20, padx=10) | |
textbox1b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 1"), width = 950, height=350) | |
textbox1b.pack(pady=20, padx=10) | |
textbox2 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 2"), width = 950, height=150) | |
textbox2.pack(pady=20, padx=10) | |
textbox2b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 2"), width = 950, height=350) | |
textbox2b.pack(pady=20, padx=10) | |
textbox3 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 3"), width = 950, height=150) | |
textbox3.pack(pady=20, padx=10) | |
textbox3b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 3"), width = 950, height=350) | |
textbox3b.pack(pady=20, padx=10) | |
textbox4 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 4"), width = 950, height=150) | |
textbox4.pack(pady=20, padx=10) | |
textbox4b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 4"), width = 950, height=350) | |
textbox4b.pack(pady=20, padx=10) | |
# Browse for folder of ducuments: | |
button = customtkinter.CTkButton(master=app, text="Index files", command=browseFiles) | |
button.place(relx=0.1, rely=0.1, anchor=tkinter.CENTER) | |
# Browse for an existing faiss store: | |
button = customtkinter.CTkButton(master=app, text="Load existing Index", command=browseDocStore) | |
button.place(relx=0.1, rely=0.05, anchor=tkinter.CENTER) | |
# defien seach button: | |
button2 = customtkinter.CTkButton(master=app, text="Search", command=button_function) | |
button2.place(relx=0.9, rely=0.03, anchor=tkinter.CENTER) | |
app.mainloop() |
i have now also added in
pip install unstructured
I now get the following error;
... query = entry.get()
TclError: invalid command name ".!ctkentry.!entry"
faiss_index = FAISS.load_local("faiss_index" , embeddings)
RuntimeError: Error in __cdecl faiss::FileIOReader::FileIOReader(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:68: Error: 'f' failed: could not open faiss_index\index.faiss for reading: No such file or directory
docs = faiss_index.similarity_search(query=query, k=5)
NameError: name 'faiss_index' is not defined
Sorry for your trouble! Now you click “index files” and select a folder of files to index. When they done you can search the file with ChatGPT!
Sorry to bother you. I am not able to get it to work. I still get the following error:
... # get question:
... query = entry.get()
NameError: name 'entry' is not defined
faiss_index = FAISS.load_local("faiss_index" , embeddings)
RuntimeError: Error in __cdecl faiss::FileIOReader::FileIOReader(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:68: Error: 'f' failed: could not open faiss_index\index.faiss for reading: No such file or directory
docs = faiss_index.similarity_search(query=query, k=5)
NameError: name 'faiss_index' is not defined
Would it be possible to start with something much smaller. Open one PDF file and get it to provide the key points in a bullet for summary? THanks for the consideration.
I also added in:
pip install faiss-cpu