Forked from hwchase17/langchain-youtube-selfquery.py
Created
December 5, 2023 16:07
-
-
Save Cdaprod/1d01c6be10b7fab1c5bc16273d78cbe1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.document_loaders import YoutubeLoader | |
from langchain.indexes import VectorstoreIndexCreator | |
urls = [ | |
("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"), | |
("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"), | |
("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"), | |
("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"), | |
("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"), | |
("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"), | |
("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"), | |
("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"), | |
] | |
docs = [] | |
for url, title in urls: | |
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) | |
_docs = loader.load() | |
for d in _docs: | |
d.metadata["name"] = title | |
docs.append(d) | |
from langchain.schema import Document | |
from langchain.embeddings import CohereEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
embeddings = CohereEmbeddings() | |
docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs) | |
vectorstore = Chroma.from_documents( | |
docs, embeddings | |
) | |
from langchain.llms import OpenAI | |
llm = OpenAI(temperature=0) | |
vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever()) | |
vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?") | |
from langchain.retrievers.self_query.base import SelfQueryRetriever | |
from langchain.chains.query_constructor.base import AttributeInfo | |
metadata_field_info=[ | |
AttributeInfo( | |
name="name", | |
description=f"The name of the video, should be one of: {[t for _, t in urls]}", | |
type="string or list[string]", | |
), | |
] | |
document_content_description = "excerpts from langchain webinars" | |
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True) | |
chain = RetrievalQA.from_chain_type(llm, retriever=retriever) | |
chain.run("what did they say about prompt injection in the agents in production webinar?") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment