Skip to content

Instantly share code, notes, and snippets.

@kkdai
Last active July 11, 2023 13:47
Show Gist options
  • Save kkdai/2885f74ebad6eeb2103edd3b2c4bf65e to your computer and use it in GitHub Desktop.
Save kkdai/2885f74ebad6eeb2103edd3b2c4bf65e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
# %%
from dotenv import load_dotenv
import os
import pickle
# Laden Sie die Umgebungsvariablen aus der .env-Datei
load_dotenv()
API_KEY = os.environ.get("OPEN_API_KEY")
# %%
from langchain.document_loaders import PyPDFLoader # for loading the pdf
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import ChatVectorDBChain # for chatting with the pdf
from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT)
# %%
pdf_path = "The-AI-Act.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
# %%
len(pages)
# %%
# 2. Creating embeddings and Vectorization
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(pages, embedding=embeddings,
persist_directory=".")
vectordb.persist()
# %%
query = "What is the bitcoin?"
vectordb.similarity_search(query, k=2, filter={"page": 1})
# %%
vectordb.max_marginal_relevance_search(query,k=2, fetch_k=3)
# %%
# 3. Querying
llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo")
pdf_qa = ChatVectorDBChain.from_llm(
llm, vectordb, return_source_documents=True)
query = "What is the bitcoin?"
result = pdf_qa({"question": query, "chat_history": ""})
print("Answer:")
print(result["answer"])
chat_history = [(query, result["answer"])]
query2 = "When it be found?"
result = pdf_qa({"question": query2, "chat_history": chat_history})
print("Answer2:")
print(result["answer"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment