kkdai · July 11, 2023 13:47
diff --git a/pdf.ipynb b/pdf.ipynb
 # %%
 from dotenv import load_dotenv
 import os
 import pickle

 # Laden Sie die Umgebungsvariablen aus der .env-Datei
 load_dotenv()
 API_KEY = os.environ.get("OPEN_API_KEY")

 # %%
 from langchain.document_loaders import PyPDFLoader  # for loading the pdf
 from langchain.embeddings import OpenAIEmbeddings  # for creating embeddings
 from langchain.vectorstores import Chroma  # for the vectorization part
 from langchain.chains import ChatVectorDBChain  # for chatting with the pdf
 from langchain.llms import OpenAI  # the LLM model we'll use (CHatGPT)



 # %%

 pdf_path = "The-AI-Act.pdf"
 loader = PyPDFLoader(pdf_path)
 pages = loader.load_and_split()


 # %%
 len(pages)

 # %%
 # 2. Creating embeddings and Vectorization
 embeddings = OpenAIEmbeddings()
 vectordb = Chroma.from_documents(pages, embedding=embeddings,
                                 persist_directory=".")
 vectordb.persist()



 # %%
 query = "What is the bitcoin?"
 vectordb.similarity_search(query, k=2, filter={"page": 1})

 # %%
 vectordb.max_marginal_relevance_search(query,k=2, fetch_k=3)

 # %%


 # 3. Querying
 llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo")
 pdf_qa = ChatVectorDBChain.from_llm(
    llm, vectordb, return_source_documents=True)

 query = "What is the bitcoin?"
 result = pdf_qa({"question": query, "chat_history": ""})
 print("Answer:")
 print(result["answer"])

 chat_history = [(query, result["answer"])]
 query2 = "When it be found?"
 result = pdf_qa({"question": query2, "chat_history": chat_history})
 print("Answer2:")
 print(result["answer"])
	# %%
	from dotenv import load_dotenv
	import os
	import pickle

	# Laden Sie die Umgebungsvariablen aus der .env-Datei
	load_dotenv()
	API_KEY = os.environ.get("OPEN_API_KEY")

	# %%
	from langchain.document_loaders import PyPDFLoader # for loading the pdf
	from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
	from langchain.vectorstores import Chroma # for the vectorization part
	from langchain.chains import ChatVectorDBChain # for chatting with the pdf
	from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT)



	# %%

	pdf_path = "The-AI-Act.pdf"
	loader = PyPDFLoader(pdf_path)
	pages = loader.load_and_split()


	# %%
	len(pages)

	# %%
	# 2. Creating embeddings and Vectorization
	embeddings = OpenAIEmbeddings()
	vectordb = Chroma.from_documents(pages, embedding=embeddings,
	persist_directory=".")
	vectordb.persist()



	# %%
	query = "What is the bitcoin?"
	vectordb.similarity_search(query, k=2, filter={"page": 1})

	# %%
	vectordb.max_marginal_relevance_search(query,k=2, fetch_k=3)

	# %%


	# 3. Querying
	llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo")
	pdf_qa = ChatVectorDBChain.from_llm(
	llm, vectordb, return_source_documents=True)

	query = "What is the bitcoin?"
	result = pdf_qa({"question": query, "chat_history": ""})
	print("Answer:")
	print(result["answer"])

	chat_history = [(query, result["answer"])]
	query2 = "When it be found?"
	result = pdf_qa({"question": query2, "chat_history": chat_history})
	print("Answer2:")
	print(result["answer"])