Created
May 17, 2023 17:53
-
-
Save johnpena/da89df8b20a305450ade27f23b0c3b07 to your computer and use it in GitHub Desktop.
summarize.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 153, | |
"outputs": [], | |
"source": [ | |
"from youtube_transcript_api import YouTubeTranscriptApi\n", | |
"\n", | |
"def get_transcript(video_id: str) -> dict:\n", | |
" return YouTubeTranscriptApi.get_transcript(video_id)" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 154, | |
"outputs": [], | |
"source": [ | |
"recent_transcript = get_transcript(\"Tw238KYw8w4\")" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 155, | |
"outputs": [], | |
"source": [ | |
"def clean_transcript(transcript: dict) -> str:\n", | |
" replacements = {\n", | |
" \"Unknown: \": \"\",\n", | |
" \"\\n\": \" \",\n", | |
" }\n", | |
"\n", | |
" doc = []\n", | |
" for line in transcript:\n", | |
" text = line['text']\n", | |
" for (k, v) in replacements.items():\n", | |
" text = text.replace(k, v)\n", | |
" doc.append(text)\n", | |
"\n", | |
" return \" \".join(doc)" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 156, | |
"outputs": [], | |
"source": [ | |
"openai_api_key = \"*\"" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"outputs": [], | |
"source": [ | |
"from langchain import OpenAI, PromptTemplate, LLMChain\n", | |
"from langchain.chat_models import ChatOpenAI\n", | |
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", | |
"from langchain.chains.mapreduce import MapReduceChain\n", | |
"from langchain.prompts import PromptTemplate\n", | |
"\n", | |
"llm = ChatOpenAI(\n", | |
" temperature=0,\n", | |
" openai_api_key=openai_api_key,\n", | |
" max_tokens=1000,\n", | |
")\n", | |
"\n", | |
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=0, separators=[\" \", \",\"])" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 158, | |
"outputs": [], | |
"source": [ | |
"texts = text_splitter.split_text(clean_transcript(recent_transcript))" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 159, | |
"outputs": [], | |
"source": [ | |
"from langchain.docstore.document import Document\n", | |
"\n", | |
"docs = [Document(page_content=t) for t in texts[:3]]" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Shikou Fang is a passionate and committed educational professional with a master's from Oxford University and a PhD from the University of California Berkeley. She has taught preschool and college freshmen reading, writing, literature, and research. She is running because there are many students who do not feel seen and heard, there is a mental health crisis in our school, and in order for a child to thrive, not just survive in school, they must feel supported and safe.\n", | |
"\n", | |
"Alyssa Dorfman has a Bachelor of Science from Cornell University and has lived in Chappaqua with her husband and daughters for 16 years. She has extensive fiscal experience from her previous career in retail financial planning. She is the only candidate to have sent a child all the way through the schools and has 16 years of institutional knowledge about the schools. She feels the biggest concerns in the district right now are the safety and support of our children both physically and emotionally, teacher retention, and program growth while remaining fiscally responsible.\n", | |
"\n", | |
"Barack Lom successfully worked with a group of parents to advocate for the expansion of before and after school care on-site programs in all of the elementary schools. He was a persistent advocate who showed up to the board meetings to represent their group. He believes strongly in working together to achieve excellence and hopes to continue doing that in the district.\n", | |
"\n", | |
"Matthew Ourback did not give an opening statement.\n" | |
] | |
} | |
], | |
"source": [ | |
"from langchain.chains.summarize import load_summarize_chain\n", | |
"\n", | |
"prompt_template = \"\"\"\n", | |
"The following is a transcript of a school board meeting.\n", | |
"The transcript contains four school board candidates discussing their qualifications for becoming members of the school board.\n", | |
"For each school board candidate, write a summary of their discussed qualifications.\n", | |
"For each candidate, the summary should contain at least 3 sentences, and no more than 5 sentences.\n", | |
"The summary should contain one sentence about their background, including their current job title, role, and company of employment if applicable.\n", | |
"The summary should contain one sentence stated reasons why they want to be on the school board.\n", | |
"Include a summary even if the candidate does not give an opening statement.\n", | |
"\n", | |
"Here is the transcript:\n", | |
"\n", | |
"{text}\n", | |
"\"\"\"\n", | |
"\n", | |
"prompt = PromptTemplate(template=prompt_template, input_variables=[\"text\"])\n", | |
"\n", | |
"chain = load_summarize_chain(llm, chain_type=\"stuff\", prompt=prompt)\n", | |
"summary = chain.run(docs)\n", | |
"\n", | |
"print(summary)" | |
], | |
"metadata": { | |
"collapsed": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false | |
} | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment