ranfysvalle02 · August 16, 2024 12:00 · ranfysvalle02 · Aug 10, 2024
diff --git a/research-yt.py b/research-yt.py
 from youtube_transcript_api import YouTubeTranscriptApi
 from duckduckgo_search import DDGS
 from openai import AzureOpenAI

 # Replace with your actual values
 AZURE_OPENAI_ENDPOINT = "https://DEMO.openai.azure.com"
 AZURE_OPENAI_API_KEY = "" 
 deployment_name = "gpt-4-32k"  # The name of your model deployment
 client = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT,api_version="2023-07-01-preview",api_key=AZURE_OPENAI_API_KEY)
 # Replace with your actual values - if desired
 VIDEO_IDS = [
 ]
 CONCEPT = ''
 def extract_youtube_id_from_href(href_url):
    # Split the URL on the '=' character
    url_parts = href_url.split('=')
    # The video ID is the part after 'v', which is the last part of the URL
    video_id = url_parts[-1]
    return video_id
 def get_transcript(video_id):
  """Fetches the transcript for a given YouTube video ID.

  Args:
    video_id: The ID of the YouTube video.

  Returns:
    A list of transcript segments, or None if no transcript is found.
  """

  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    alltext = (' '.join(item['text'] for item in transcript))
    return alltext
  except Exception as e:
    print(f"Error fetching transcript for {video_id}: {e}")
    return None

 ## START THE SHOW
 if CONCEPT == '':
  CONCEPT = input("What would you like to learn about? ")
 if len(VIDEO_IDS) == 0:
  query = CONCEPT + " site:youtube.com"
  results = DDGS().text(str(query),region="us-en", max_results=5)
  for result in results:
    VIDEO_IDS.append(extract_youtube_id_from_href(result["href"]))
 all_videos_str = ""
 for video_id in VIDEO_IDS:
  vidtxt = get_transcript(video_id)
  if vidtxt:
    all_videos_str += "[video_id:"+video_id+"]\n"+str(vidtxt) + "\n[end video_id:"+video_id+"]\n"
 messages = [
  {"role": "system", "content": "You are a helpful assistant that summarizes multiple video transcripts into a comprehensive set of detailed notes."},
  {"role": "user", "content": "I'm trying to learn about " + CONCEPT},
  {"role": "user", "content": "This is all the video transcript text I found online: " + all_videos_str },
  {"role": "user", "content": "Give me a comprehensive set of notes. Minimum of 15000 characters. Think critically and step by step."},
 ]
 print(messages)
 ai_msg = completion = client.chat.completions.create(
  model=deployment_name,
  messages=messages, 
 )
 print("--------------------------")
 print(ai_msg.choices[0].message.content)
	from youtube_transcript_api import YouTubeTranscriptApi
	from duckduckgo_search import DDGS
	from openai import AzureOpenAI

	# Replace with your actual values
	AZURE_OPENAI_ENDPOINT = "https://DEMO.openai.azure.com"
	AZURE_OPENAI_API_KEY = ""
	deployment_name = "gpt-4-32k" # The name of your model deployment
	client = AzureOpenAI(azure_endpoint=AZURE_OPENAI_ENDPOINT,api_version="2023-07-01-preview",api_key=AZURE_OPENAI_API_KEY)
	# Replace with your actual values - if desired
	VIDEO_IDS = [
	]
	CONCEPT = ''
	def extract_youtube_id_from_href(href_url):
	# Split the URL on the '=' character
	url_parts = href_url.split('=')
	# The video ID is the part after 'v', which is the last part of the URL
	video_id = url_parts[-1]
	return video_id
	def get_transcript(video_id):
	"""Fetches the transcript for a given YouTube video ID.

	Args:
	video_id: The ID of the YouTube video.

	Returns:
	A list of transcript segments, or None if no transcript is found.
	"""

	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	alltext = (' '.join(item['text'] for item in transcript))
	return alltext
	except Exception as e:
	print(f"Error fetching transcript for {video_id}: {e}")
	return None

	## START THE SHOW
	if CONCEPT == '':
	CONCEPT = input("What would you like to learn about? ")
	if len(VIDEO_IDS) == 0:
	query = CONCEPT + " site:youtube.com"
	results = DDGS().text(str(query),region="us-en", max_results=5)
	for result in results:
	VIDEO_IDS.append(extract_youtube_id_from_href(result["href"]))
	all_videos_str = ""
	for video_id in VIDEO_IDS:
	vidtxt = get_transcript(video_id)
	if vidtxt:
	all_videos_str += "[video_id:"+video_id+"]\n"+str(vidtxt) + "\n[end video_id:"+video_id+"]\n"
	messages = [
	{"role": "system", "content": "You are a helpful assistant that summarizes multiple video transcripts into a comprehensive set of detailed notes."},
	{"role": "user", "content": "I'm trying to learn about " + CONCEPT},
	{"role": "user", "content": "This is all the video transcript text I found online: " + all_videos_str },
	{"role": "user", "content": "Give me a comprehensive set of notes. Minimum of 15000 characters. Think critically and step by step."},
	]
	print(messages)
	ai_msg = completion = client.chat.completions.create(
	model=deployment_name,
	messages=messages,
	)
	print("--------------------------")
	print(ai_msg.choices[0].message.content)