Skip to content

Instantly share code, notes, and snippets.

@zudsniper
Forked from codelahoma/README.md
Created July 26, 2023 05:06
Show Gist options
  • Save zudsniper/610d1a256022634c93c7e8f7f41cf474 to your computer and use it in GitHub Desktop.
Save zudsniper/610d1a256022634c93c7e8f7f41cf474 to your computer and use it in GitHub Desktop.
Automatically generate summaries of YouTube videos with transcriptions, using OpenAI's language model and create Markdown files with the output.

YouTube Video Summarizer (yt_summarize.py)

This gist contains a Python script that generates a transcript or summary of a YouTube video. It fetches video information, transcribes the audio using the Whisper ASR model, and generates a summary using the OpenAI language model.

Features

  • Fetch YouTube video information (title, description, channel title, etc.)
  • Transcribe video audio
  • Generate a summary of the video transcript
  • Save output as a markdown file

Prerequisites

  • Python 3.6 or later
  • Google API Key
  • Whisper ASR Model
  • OpenAI API Key

Installation

  1. Download yt_summarize.py from this gist.
  2. Install the required packages by running:
pip install -r requirements.txt
  1. Set the necessary environment variables:
export GOOGLE_API_KEY="your_google_api_key"
export OPENAI_API_KEY="your_openai_api_key" 

Usage

Run the script with the following command:

python yt_summarize.py -u "https://www.youtube.com/watch?v=video_id" -t -s
  • -u or --url: The URL of the YouTube video to process (it will prompt you to input it if this is not present)
  • -t or --transcribe: Transcribe the video's audio.
  • -s or --summary: Generate a summary of the video.

If neither transcription or summary are requested, a markdown presentation of the video's metadata is output to stdout.

Files produced

If the -t flag is set, the audio will be saved to a unique file in ./audio/. Transcriptions are saved to the ./transcriptions/. These are to prevent the re-download and/or re-transcription if the same url is seen more than once by the script.

If the -s flag is set, a markdown formatted document with a playable embed of the video (if rendered as HTML), the video metadata, and a summary is created for your review in summaries, and is also output to stdout.

google-api-python-client==2.83.0
google-auth-httplib2==0.1.0
isodate==0.6.1
openai==0.27.2
openai-whisper==20230314
pydub==0.25.1
pytube==12.1.3
import argparse
import os
import re
import string
import warnings
import isodate
import whisper
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from langchain import OpenAI, PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from pydub import AudioSegment
from pytube import YouTube
# Your API key goes here
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
# set up prompts here for formatting
map_prompt_template = """
The following is the transcript of a video. Please provide a brief summary of the video, including the main points and key takeaways. Output should be as a markdown outline.
{text}
BRIEF SUMMARY IN MARKDOWN FORMAT:
"""
MAP_PROMPT = PromptTemplate(template=map_prompt_template, input_variables=["text"])
combine_prompt_template = """Here are a few markdown outlines of the video. Please combine them into a single outline.
{text}
COMBINED OUTLINE IN MARKDOWN FORMAT:
"""
COMBINE_PROMPT = PromptTemplate(
template=combine_prompt_template, input_variables=["text"]
)
def create_summary_filename(video_title, channel_title):
valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
safe_video_title = (
"".join(c for c in video_title if c in valid_chars).strip().replace(" ", "_")
)
safe_channel_title = (
"".join(c for c in channel_title if c in valid_chars).strip().replace(" ", "_")
)
filename = f"summaries/{safe_channel_title}_{safe_video_title}.md"
return filename
# Get video ID from the URL
def get_video_id(url):
video_id = None
pattern = re.compile(
r"(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]+)"
)
match = pattern.match(url)
if match:
video_id = match.group(4)
return video_id
# Get video details using YouTube Data API v3
def get_video_details(video_id):
try:
youtube = build("youtube", "v3", developerKey=GOOGLE_API_KEY)
response = (
youtube.videos().list(part="snippet,contentDetails", id=video_id).execute()
)
return response["items"][0] if response["items"] else None
except HttpError as e:
print(f"An error occurred: {e}")
return None
# Convert ISO 8601 duration to a human-readable format
def format_duration(duration):
parsed_duration = isodate.parse_duration(duration)
total_seconds = int(parsed_duration.total_seconds())
hours, remainder = divmod(total_seconds, 3600)
minutes, seconds = divmod(remainder, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
def generate_unique_filename(video_id, prefix, extension):
return f"{prefix}/{video_id}.{extension}"
def transcribe_audio(video_id, video_url):
# Create directories for audio_streams and transcriptions if they don't exist
os.makedirs("audio_streams", exist_ok=True)
os.makedirs("transcriptions", exist_ok=True)
transcription_filename = generate_unique_filename(video_id, "transcriptions", "txt")
if os.path.exists(transcription_filename):
with open(transcription_filename, "r") as transcription_file:
transcription = transcription_file.read()
else:
# Download the video as audio
yt = YouTube(video_url)
video = yt.streams.filter(only_audio=True).first()
audio_filename = generate_unique_filename(video_id, "audio_streams", "mp4")
file_name = video.download(filename=audio_filename)
# Convert the audio file to WAV format
audio = AudioSegment.from_file(file_name)
audio.export("audio.wav", format="wav")
# Load the Whisper ASR model
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
model = whisper.load_model("base")
# Transcribe the audio
result = model.transcribe("audio.wav")
transcription = result["text"]
# Save the transcription to a file
with open(transcription_filename, "w") as transcription_file:
transcription_file.write(transcription)
# Cleanup
os.remove("audio.wav")
return transcription
def split_text_to_documents(text, max_length=4096, overlap=100):
tokens = text.split()
text_chunks = []
current_chunk = []
current_length = 0
for token in tokens:
if current_length + len(token) + 1 > max_length - overlap:
text_chunks.append(" ".join(current_chunk))
current_chunk = current_chunk[-overlap:]
current_length = sum(len(t) + 1 for t in current_chunk)
current_chunk.append(token)
current_length += len(token) + 1
if current_chunk:
text_chunks.append(" ".join(current_chunk))
return [Document(page_content=t) for t in text_chunks]
# Main function
def main(args):
if args.url:
url = args.url
else:
url = input("Please enter a YouTube video URL: ")
video_id = get_video_id(url)
if not video_id:
print("Invalid YouTube URL")
return
embed_url = f"https://www.youtube.com/embed/{video_id}"
embed_code = f'<iframe width="560" height="315" src="{embed_url}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
video_details = get_video_details(video_id)
if not video_details:
print("Could not fetch video details")
return
snippet = video_details["snippet"]
content_details = video_details["contentDetails"]
title = snippet["title"]
description = snippet["description"]
channel_title = snippet["channelTitle"]
length = format_duration(content_details["duration"])
published_at = snippet["publishedAt"]
markdown_block = f"""
{embed_code}
## {title}
**Channel**: {channel_title}
**Published**: {published_at}
**Length**: {length}
**Description**:
{description}
"""
print(markdown_block)
if args.transcribe:
transcription = transcribe_audio(video_id, url)
if args.summary:
llm = OpenAI(temperature=0)
# Split the transcription into smaller chunks as Documents
docs = split_text_to_documents(transcription)
# Choose a chain type for summarization
chain = load_summarize_chain(
llm,
chain_type="map_reduce",
map_prompt=MAP_PROMPT,
combine_prompt=COMBINE_PROMPT,
)
# Run the summarization chain
summary = chain.run(docs)
# Print or store the summary
print(summary)
output_filename = create_summary_filename(title, channel_title)
os.makedirs("summaries", exist_ok=True)
with open(output_filename, "w") as output_file:
output_file.write(markdown_block)
if args.transcribe:
transcription = transcribe_audio(video_id, url)
if args.summary:
output_file.write(f"\n\n{summary}")
print(f"Summary saved to {output_filename}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Fetch YouTube video information and generate a markdown block"
)
parser.add_argument("-u", "--url", help="YouTube video URL")
parser.add_argument(
"-t", "--transcribe", action="store_true", help="Transcribe the video audio"
)
parser.add_argument(
"-s", "--summary", action="store_true", help="Summarize the video transcript"
)
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment