Created
May 11, 2023 22:39
-
-
Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.document_loaders import YoutubeLoader | |
from langchain.indexes import VectorstoreIndexCreator | |
urls = [ | |
("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"), | |
("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"), | |
("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"), | |
("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"), | |
("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"), | |
("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"), | |
("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"), | |
("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"), | |
] | |
docs = [] | |
for url, title in urls: | |
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) | |
_docs = loader.load() | |
for d in _docs: | |
d.metadata["name"] = title | |
docs.append(d) | |
from langchain.schema import Document | |
from langchain.embeddings import CohereEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
embeddings = CohereEmbeddings() | |
docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs) | |
vectorstore = Chroma.from_documents( | |
docs, embeddings | |
) | |
from langchain.llms import OpenAI | |
llm = OpenAI(temperature=0) | |
vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever()) | |
vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?") | |
from langchain.retrievers.self_query.base import SelfQueryRetriever | |
from langchain.chains.query_constructor.base import AttributeInfo | |
metadata_field_info=[ | |
AttributeInfo( | |
name="name", | |
description=f"The name of the video, should be one of: {[t for _, t in urls]}", | |
type="string or list[string]", | |
), | |
] | |
document_content_description = "excerpts from langchain webinars" | |
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True) | |
chain = RetrievalQA.from_chain_type(llm, retriever=retriever) | |
chain.run("what did they say about prompt injection in the agents in production webinar?") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment