Created
September 4, 2023 06:53
-
-
Save roylez/f780cc41e62f81ac6b0e65a10b4c892e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# https://python.langchain.com/docs/use_cases/question_answering/ | |
# pip install chromadb langchain openai tiktoken | |
# pip install unstructured markdown # for markdown loader | |
# pip install sqlite-vss | |
import dotenv | |
dotenv.load_dotenv() | |
# basic integration | |
from langchain.document_loaders import WebBaseLoader | |
from langchain.indexes import VectorstoreIndexCreator | |
loader = WebBaseLoader("https://juju.is/docs/dev/agent-introspection") | |
index = VectorstoreIndexCreator().from_loaders([loader]) | |
print(index.query("which version of juju defines `juju_leaves`")) | |
# load and save | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
# from langchain.vectorstores import Chroma as VSS | |
from langchain.vectorstores import SQLiteVSS as VSS | |
data = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0) | |
web_splits = text_splitter.split_documents(data) | |
## even more data, from markdown | |
from langchain.document_loaders import DirectoryLoader | |
md_loader = DirectoryLoader(".", glob="**/*.md") | |
md_splits = md_loader.load_and_split() | |
#print(md_splits) | |
vectorstore = VSS.from_documents(documents=web_splits + md_splits, embedding=OpenAIEmbeddings()) | |
# retrieve, vector based | |
#question = "which version of juju defines juju_unit_status" | |
#docs = vectorstore.similarity_search(question) | |
# generate answers, LLMChain could be another choice which retains context | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) | |
qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever()) | |
def query(question): | |
res = qa_chain({"query": question}) | |
print(res['result']) | |
return res | |
query("what is the name of roy's pet") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment