Created
March 9, 2025 21:27
-
-
Save galaris/f1c5b45501409d29314c2a84b0d26332 to your computer and use it in GitHub Desktop.
Python to extract a YouTube video's chapters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from typing import Optional | |
class YoutubeChaptersGetter: | |
def __init__(self, youtube_url: str = "https://youtube.com"): | |
self.youtube = youtube_url | |
def _get_video(self, video_id: str) -> Optional[str]: | |
try: | |
response = requests.get(f"{self.youtube}/watch?v={video_id}") | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as error: | |
print(f"getVideo err: {error}") | |
return None | |
def _get_script(self, html: str) -> Optional[str]: | |
try: | |
script_tag_start = "var ytInitialData = " | |
script_tag_end = "</script>" | |
start_index = html.find(script_tag_start) | |
if start_index == -1: | |
return None | |
end_index = html.find(script_tag_end, start_index) | |
if end_index == -1: | |
return None | |
yt_initial_data = html[start_index + len(script_tag_start):end_index].strip() | |
if yt_initial_data.endswith(";"): | |
return yt_initial_data[:-1] | |
return yt_initial_data | |
except Exception as error: | |
print(f"getScript err: {error}") | |
return None | |
def _millis_to_time(self, millis: int) -> str: | |
"""Convert milliseconds to a time string like '5:22'.""" | |
seconds = millis // 1000 | |
minutes = seconds // 60 | |
seconds = seconds % 60 | |
return f"{minutes}:{seconds:02d}" | |
def get_chapters(self, video_id: str) -> str: | |
""" | |
Get chapters for a YouTube video ID and return as JSON string. | |
Returns chapter data with title and time on success, empty JSON array on failure. | |
""" | |
try: | |
html = self._get_video(video_id) | |
if not html: | |
print("No HTML content retrieved") | |
return "[]" | |
script_data = self._get_script(html) | |
if not script_data: | |
print("No script data extracted") | |
return "[]" | |
yt_initial_data = json.loads(script_data) | |
# Navigate to the chapters location | |
try: | |
player_overlays = yt_initial_data["playerOverlays"] | |
overlay_renderer = player_overlays["playerOverlayRenderer"] | |
decorated_bar = overlay_renderer["decoratedPlayerBarRenderer"]["decoratedPlayerBarRenderer"] | |
player_bar = decorated_bar["playerBar"] | |
multi_markers = player_bar["multiMarkersPlayerBarRenderer"] | |
markers_map = multi_markers["markersMap"] | |
if not markers_map or len(markers_map) == 0: | |
print("No markersMap data found") | |
return "[]" | |
chapters_data = markers_map[0]["value"]["chapters"] | |
chapters = [] | |
for chapter_item in chapters_data: | |
chapter = chapter_item["chapterRenderer"] | |
time_millis = chapter["timeRangeStartMillis"] | |
time_str = self._millis_to_time(time_millis) | |
chapters.append({ | |
"title": chapter["title"]["simpleText"], | |
"time": time_str | |
}) | |
# Return as JSON string | |
return json.dumps(chapters) | |
except KeyError as key_err: | |
print(f"Key error in path: Missing key {key_err}") | |
return "[]" | |
except json.JSONDecodeError as json_err: | |
print(f"JSON parsing error: {json_err}") | |
return "[]" | |
except Exception as error: | |
print(f"Unexpected error in get_chapters: {error}") | |
return "[]" | |
# Singleton instance for use in other applications | |
youtube_chapters_getter = YoutubeChaptersGetter() | |
def get_youtube_chapters(video_id: str) -> str: | |
""" | |
Public function to get YouTube chapters as JSON string. | |
Takes a video ID and returns chapters or empty JSON array. | |
""" | |
return youtube_chapters_getter.get_chapters(video_id) | |
# Example usage when run directly | |
if __name__ == "__main__": | |
result = get_youtube_chapters("YOUTUBE_VIDEO_ID_COMES_HERE") | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment