Last active
August 16, 2024 12:00
-
-
Save ranfysvalle02/86f4b89cfe1d33bbc0a76fb28858aba3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from youtube_transcript_api import YouTubeTranscriptApi | |
from duckduckgo_search import DDGS | |
from openai import AzureOpenAI | |
# Replace with your actual values | |
AZURE_OPENAI_ENDPOINT = "https://DEMO.openai.azure.com" | |
AZURE_OPENAI_API_KEY = "" | |
deployment_name = "gpt-4-32k" # The name of your model deployment | |
client = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT,api_version="2023-07-01-preview",api_key=AZURE_OPENAI_API_KEY) | |
# Replace with your actual values - if desired | |
VIDEO_IDS = [ | |
] | |
CONCEPT = '' | |
def extract_youtube_id_from_href(href_url): | |
# Split the URL on the '=' character | |
url_parts = href_url.split('=') | |
# The video ID is the part after 'v', which is the last part of the URL | |
video_id = url_parts[-1] | |
return video_id | |
def get_transcript(video_id): | |
"""Fetches the transcript for a given YouTube video ID. | |
Args: | |
video_id: The ID of the YouTube video. | |
Returns: | |
A list of transcript segments, or None if no transcript is found. | |
""" | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
alltext = (' '.join(item['text'] for item in transcript)) | |
return alltext | |
except Exception as e: | |
print(f"Error fetching transcript for {video_id}: {e}") | |
return None | |
## START THE SHOW | |
if CONCEPT == '': | |
CONCEPT = input("What would you like to learn about? ") | |
if len(VIDEO_IDS) == 0: | |
query = CONCEPT + " site:youtube.com" | |
results = DDGS().text(str(query),region="us-en", max_results=5) | |
for result in results: | |
VIDEO_IDS.append(extract_youtube_id_from_href(result["href"])) | |
all_videos_str = "" | |
for video_id in VIDEO_IDS: | |
vidtxt = get_transcript(video_id) | |
if vidtxt: | |
all_videos_str += "[video_id:"+video_id+"]\n"+str(vidtxt) + "\n[end video_id:"+video_id+"]\n" | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant that summarizes multiple video transcripts into a comprehensive set of detailed notes."}, | |
{"role": "user", "content": "I'm trying to learn about " + CONCEPT}, | |
{"role": "user", "content": "This is all the video transcript text I found online: " + all_videos_str }, | |
{"role": "user", "content": "Give me a comprehensive set of notes. Minimum of 15000 characters. Think critically and step by step."}, | |
] | |
print(messages) | |
ai_msg = completion = client.chat.completions.create( | |
model=deployment_name, | |
messages=messages, | |
) | |
print("--------------------------") | |
print(ai_msg.choices[0].message.content) |
Author
ranfysvalle02
commented
Aug 10, 2024
- "What do you want to learn about?" (await user input)
- (use input to search the web for youtube videos on the input)
- extract video transcript
- summarize all the video transcripts
- build a comprehensive list of notes on the topic
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment