Cdaprod · December 5, 2023 16:07
diff --git a/langchain-youtube-selfquery.py b/langchain-youtube-selfquery.py
 from langchain.document_loaders import YoutubeLoader
 from langchain.indexes import VectorstoreIndexCreator
 urls = [
    ("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
    ("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
    ("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
    ("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
    ("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
    ("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
    ("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
    ("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),
    
 ]
 docs = []
 for url, title in urls:
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
    _docs = loader.load()
    for d in _docs:
        d.metadata["name"] = title
        docs.append(d)
        
 from langchain.schema import Document
 from langchain.embeddings import CohereEmbeddings
 from langchain.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains import RetrievalQA
 embeddings = CohereEmbeddings()

 docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)

 vectorstore = Chroma.from_documents(
    docs, embeddings
 )

 from langchain.llms import OpenAI
 llm = OpenAI(temperature=0)

 vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

 vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")

 from langchain.retrievers.self_query.base import SelfQueryRetriever
 from langchain.chains.query_constructor.base import AttributeInfo

 metadata_field_info=[
    AttributeInfo(
        name="name",
        description=f"The name of the video, should be one of: {[t for _, t in urls]}", 
        type="string or list[string]", 
    ),
 ]
 document_content_description = "excerpts from langchain webinars"

 retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

 chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

 chain.run("what did they say about prompt injection in the agents in production webinar?")
	from langchain.document_loaders import YoutubeLoader
	from langchain.indexes import VectorstoreIndexCreator
	urls = [
	("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
	("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
	("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
	("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
	("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
	("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
	("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
	("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),

	]
	docs = []
	for url, title in urls:
	loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
	_docs = loader.load()
	for d in _docs:
	d.metadata["name"] = title
	docs.append(d)

	from langchain.schema import Document
	from langchain.embeddings import CohereEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	embeddings = CohereEmbeddings()

	docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)

	vectorstore = Chroma.from_documents(
	docs, embeddings
	)

	from langchain.llms import OpenAI
	llm = OpenAI(temperature=0)

	vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

	vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")

	from langchain.retrievers.self_query.base import SelfQueryRetriever
	from langchain.chains.query_constructor.base import AttributeInfo

	metadata_field_info=[
	AttributeInfo(
	name="name",
	description=f"The name of the video, should be one of: {[t for _, t in urls]}",
	type="string or list[string]",
	),
	]
	document_content_description = "excerpts from langchain webinars"

	retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

	chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

	chain.run("what did they say about prompt injection in the agents in production webinar?")