Created
August 26, 2023 09:31
-
-
Save youtube-jocoding/0b96f01710f166b52f171022376f19ec to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dotenv import load_dotenv | |
load_dotenv() | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.chat_models import ChatOpenAI | |
from langchain.retrievers.multi_query import MultiQueryRetriever | |
from langchain.chains import RetrievalQA | |
#Loader | |
loader = PyPDFLoader("unsu.pdf") | |
pages = loader.load_and_split() | |
#Split | |
text_splitter = RecursiveCharacterTextSplitter( | |
# Set a really small chunk size, just to show. | |
chunk_size = 300, | |
chunk_overlap = 20, | |
length_function = len, | |
is_separator_regex = False, | |
) | |
texts = text_splitter.split_documents(pages) | |
#Embedding | |
embeddings_model = OpenAIEmbeddings() | |
# load it into Chroma | |
db = Chroma.from_documents(texts, embeddings_model) | |
#Question | |
question = "아내가 먹고 싶어하는 음식은 무엇이야?" | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) | |
qa_chain = RetrievalQA.from_chain_type(llm,retriever=db.as_retriever()) | |
result = qa_chain({"query": question}) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment