Created
February 16, 2023 20:56
-
-
Save MichelNivard/e8fd2efae5114a189f40d9eb6ea18dc2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tkinter | |
import customtkinter | |
from bs4 import BeautifulSoup | |
# Langchain loads: | |
from langchain.document_loaders import DirectoryLoader,PagedPDFSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS, Qdrant | |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
from langchain.llms import OpenAI | |
from langchain.prompts import PromptTemplate | |
# import filedialog module | |
from tkinter import filedialog | |
customtkinter.set_appearance_mode("System") # Modes: system (default), light, dark | |
customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green | |
app = customtkinter.CTk() # create CTk window like you do with the Tk window | |
app.geometry("1080x960") | |
OPENAI_API_KEY="YOUR API GOES HERE!" | |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
# Function for opening the | |
# file explorer window | |
def browseFiles(): | |
filename = filedialog.askdirectory() | |
loader = DirectoryLoader(filename, glob="*.pdf") | |
pdfs = loader.load_and_split() | |
faiss_index = FAISS.from_documents(pdfs,embeddings) | |
faiss_index.save_local("faiss_index") | |
# Function for opening the | |
# file explorer window | |
def browseDocStore(): | |
faiss_file = filedialog.askdirectory(initialdir = "/", title = "Select a Vectorstore Index") | |
faiss_index = FAISS.load_local(faiss_file , embeddings) | |
# Search Button | |
def button_function(): | |
# clear previous search | |
textbox.delete("0.0", "end") | |
textbox1.delete("0.0", "end") | |
textbox2.delete("0.0", "end") | |
textbox3.delete("0.0", "end") | |
textbox4.delete("0.0", "end") | |
textbox1b.delete("0.0", "end") | |
textbox2b.delete("0.0", "end") | |
textbox3b.delete("0.0", "end") | |
textbox4b.delete("0.0", "end") | |
# get question: | |
query = entry.get() | |
faiss_index = FAISS.load_local("faiss_index" , embeddings) | |
docs = faiss_index.similarity_search(query=query, k=5) | |
# set up prompt: | |
question_prompt_template = """Summarize the following text in 250 tokens or less: | |
{context}""" | |
QUESTION_PROMPT = PromptTemplate( | |
template=question_prompt_template, input_variables=["context"] | |
) | |
combine_prompt_template = """Given the following extracted parts of a number of long documents and a question, create a final answer. | |
If you don't know the answer, just say that you don't know. Don't try to make up an answer. | |
QUESTION: {question} | |
========= | |
{summaries} | |
========= | |
FINAL ANSWER:""" | |
COMBINE_PROMPT = PromptTemplate( | |
template=combine_prompt_template, input_variables=["summaries", "question"] | |
) | |
chain = load_qa_with_sources_chain(OpenAI(temperature=0,openai_api_key=OPENAI_API_KEY), chain_type="map_reduce", return_intermediate_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT) | |
# get answer and display the sources: | |
answer = chain({"input_documents": docs, "question": query}, return_only_outputs=False) | |
textbox.insert("0.0", answer["output_text"]) | |
textbox1.insert("0.0", answer["intermediate_steps"][0]) | |
textbox1b.insert("0.0", answer["input_documents"][0]) | |
textbox2.insert("0.0", answer["intermediate_steps"][1]) | |
textbox2b.insert("0.0", answer["input_documents"][1]) | |
textbox3.insert("0.0", answer["intermediate_steps"][2]) | |
textbox3b.insert("0.0", answer["input_documents"][2]) | |
textbox4.insert("0.0", answer["intermediate_steps"][3]) | |
textbox4b.insert("0.0", answer["input_documents"][3]) | |
# seach field | |
entry = customtkinter.CTkEntry(master=app, placeholder_text="query",width = 600) | |
entry.pack(padx=20, pady=10) | |
entry.place(relx=0.5, rely=0.03, anchor=tkinter.CENTER) | |
# output textbox | |
textbox = customtkinter.CTkTextbox(master=app, width = 750, height=150) | |
textbox.pack(pady=20, padx=10) | |
textbox.place(relx=0.57, rely=0.15, anchor=tkinter.CENTER) | |
tabview = customtkinter.CTkTabview(master=app, width=750) | |
tabview.pack(pady=20, padx=10) | |
tabview.place(relx=0.5, rely=0.6, anchor=tkinter.CENTER) | |
tabview.add("Source and Summary 1") | |
tabview.add("Source and Summary 2") | |
tabview.add("Source and Summary 3") | |
tabview.add("Source and Summary 4") | |
# Sources textbox | |
textbox1 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 1"), width = 950, height=150) | |
textbox1.pack(pady=20, padx=10) | |
textbox1b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 1"), width = 950, height=350) | |
textbox1b.pack(pady=20, padx=10) | |
textbox2 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 2"), width = 950, height=150) | |
textbox2.pack(pady=20, padx=10) | |
textbox2b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 2"), width = 950, height=350) | |
textbox2b.pack(pady=20, padx=10) | |
textbox3 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 3"), width = 950, height=150) | |
textbox3.pack(pady=20, padx=10) | |
textbox3b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 3"), width = 950, height=350) | |
textbox3b.pack(pady=20, padx=10) | |
textbox4 = customtkinter.CTkTextbox(tabview.tab("Source and Summary 4"), width = 950, height=150) | |
textbox4.pack(pady=20, padx=10) | |
textbox4b = customtkinter.CTkTextbox(tabview.tab("Source and Summary 4"), width = 950, height=350) | |
textbox4b.pack(pady=20, padx=10) | |
# Browse for folder of ducuments: | |
button = customtkinter.CTkButton(master=app, text="Index files", command=browseFiles) | |
button.place(relx=0.1, rely=0.1, anchor=tkinter.CENTER) | |
# Browse for an existing faiss store: | |
button = customtkinter.CTkButton(master=app, text="Load existing Index", command=browseDocStore) | |
button.place(relx=0.1, rely=0.05, anchor=tkinter.CENTER) | |
# defien seach button: | |
button2 = customtkinter.CTkButton(master=app, text="Search", command=button_function) | |
button2.place(relx=0.9, rely=0.03, anchor=tkinter.CENTER) | |
app.mainloop() |
Sorry to bother you. I am not able to get it to work. I still get the following error:
... # get question:
... query = entry.get()
NameError: name 'entry' is not defined
faiss_index = FAISS.load_local("faiss_index" , embeddings)
RuntimeError: Error in __cdecl faiss::FileIOReader::FileIOReader(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:68: Error: 'f' failed: could not open faiss_index\index.faiss for reading: No such file or directory
docs = faiss_index.similarity_search(query=query, k=5)
NameError: name 'faiss_index' is not defined
Would it be possible to start with something much smaller. Open one PDF file and get it to provide the key points in a bullet for summary? THanks for the consideration.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry for your trouble! Now you click “index files” and select a folder of files to index. When they done you can search the file with ChatGPT!