Created
May 13, 2023 13:04
-
-
Save ggarber/a2e6c68567a4c2abbbf488c37f4a99a3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install dependencies "pip install youtube_transcript_api langchain" | |
# Run with "OPENAI_API_KEY=xxxx python summarize.py <youtube_id>" | |
import sys | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from langchain import OpenAI, PromptTemplate | |
from langchain.text_splitter import TokenTextSplitter | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.summarize import load_summarize_chain | |
srt = YouTubeTranscriptApi.get_transcript(sys.argv[1]) | |
captions = [c['text'] for c in srt] | |
text = ' '.join(captions) | |
llm = OpenAI(temperature=0, max_tokens=1000) | |
text_splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=100) | |
docs = text_splitter.create_documents([text]) | |
prompt_template = """Summarize in 10 bullet points the following presentation: | |
{text}""" | |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) | |
chain = load_summarize_chain(llm, chain_type="map_reduce", combine_prompt=PROMPT) | |
# run langchain chain and print results | |
result = chain.run(docs) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment