Created
March 6, 2024 20:25
-
-
Save mikesparr/5833b2df9c9ed5db4ccd34b8cd4d31bf to your computer and use it in GitHub Desktop.
Experiment using Langchain, OpenAI and Streamlit, along with FAISS for CPU vector store, that analyzes YouTube transcripts and answers questions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# reference: https://www.youtube.com/watch?v=lG7Uxts9SXs (some code was deprecated so fix along way) | |
# create folder | |
mkdir youtube-assistant | |
cd youtube-assistant | |
# set up virtual env | |
python3 -m venv .venv | |
source .venv/bin/activate | |
# install dependencies | |
pip3 install -U python-dotenv langchain langchain-community langchain-openai openai youtube-transcript-api streamlit faiss-cpu | |
# create .env | |
echo "OPENAI_API_KEY=\"sk-YOURKEYHERE\"" > .env | |
# test the helper | |
python3 langchain_helper.py # prints object ID (hoping for no errors, warnings expected) | |
# run the chat bot using streamlit | |
streamlit run main.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain_community.document_loaders import YoutubeLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_openai import OpenAI | |
from langchain_openai import OpenAIEmbeddings | |
from dotenv import load_dotenv | |
load_dotenv() | |
embeddings = OpenAIEmbeddings() | |
def create_vector_db_from_youtube_url(video_url: str) -> FAISS: | |
loader = YoutubeLoader.from_youtube_url(video_url) | |
transcript = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
docs = text_splitter.split_documents(transcript) | |
db = FAISS.from_documents(docs, embeddings) | |
return db | |
def get_response_from_query(db, query, k=4): | |
# text-davinci can handle 4097 tokens | |
docs = db.similarity_search(query, k) | |
docs_page_content = " ".join([d.page_content for d in docs]) | |
llm = OpenAI(model="gpt-3.5-turbo-instruct") # https://platform.openai.com/docs/deprecations | |
prompt = PromptTemplate( | |
input_variables = ["question", "docs"], | |
template = """ | |
You are a helpful YouTube assistant that can answer questions about videos based on the video's transcript. | |
Answer the following question: {question} | |
By searching the following video transcript: {docs} | |
Only use the factual information from the transcript to answer the question. | |
If you feel like you don't have enough information to answer the question, say "I don't know". | |
Your answers should be detailed. | |
""" | |
) | |
chain = LLMChain(llm = llm, prompt = prompt) | |
response = chain.run(question = query, docs = docs_page_content) | |
response = response.replace("\n", "") | |
return response, docs | |
if __name__ == "__main__": | |
print("Testing ...") | |
video_url = "https://youtu.be/-Osca2Zax4Y?si=iyOiePxzUy_bUayO" | |
result = create_vector_db_from_youtube_url(video_url) | |
print(result) # just prints object ID to confirm it created it |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import langchain_helper as lch | |
import streamlit as st | |
import textwrap | |
st.title("YouTube Assistant") | |
with st.sidebar: | |
with st.form(key='my_form'): | |
youtube_url = st.sidebar.text_area( | |
label="What is the YouTube video URL?", | |
max_chars=50 | |
) | |
query = st.sidebar.text_area( | |
label="Ask me about the video?", | |
max_chars=50, | |
key="query" | |
) | |
submit_button = st.form_submit_button(label="Submit") | |
if query and youtube_url: | |
db = lch.create_vector_db_from_youtube_url(youtube_url) | |
response, docs = lch.get_response_from_query(db, query) | |
st.subheader("Answer:") | |
st.text(textwrap.fill(response, width = 80)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generative AI (GenAI) YouTube Video Helper
This example from second part of a Langchain Tutorial teaches you how to transcribe text from YouTube videos, then embed and store the chunked text into a vector store using FAISS, and ask questions about the video using only the transcribed text.
Result
Valid query
Invalid query
Test with different video on Kong API Gateways