Skip to content

Instantly share code, notes, and snippets.

@galaris
Created March 9, 2025 21:27
Show Gist options
  • Save galaris/f1c5b45501409d29314c2a84b0d26332 to your computer and use it in GitHub Desktop.
Save galaris/f1c5b45501409d29314c2a84b0d26332 to your computer and use it in GitHub Desktop.
Python to extract a YouTube video's chapters
import requests
import json
from typing import Optional
class YoutubeChaptersGetter:
def __init__(self, youtube_url: str = "https://youtube.com"):
self.youtube = youtube_url
def _get_video(self, video_id: str) -> Optional[str]:
try:
response = requests.get(f"{self.youtube}/watch?v={video_id}")
response.raise_for_status()
return response.text
except requests.RequestException as error:
print(f"getVideo err: {error}")
return None
def _get_script(self, html: str) -> Optional[str]:
try:
script_tag_start = "var ytInitialData = "
script_tag_end = "</script>"
start_index = html.find(script_tag_start)
if start_index == -1:
return None
end_index = html.find(script_tag_end, start_index)
if end_index == -1:
return None
yt_initial_data = html[start_index + len(script_tag_start):end_index].strip()
if yt_initial_data.endswith(";"):
return yt_initial_data[:-1]
return yt_initial_data
except Exception as error:
print(f"getScript err: {error}")
return None
def _millis_to_time(self, millis: int) -> str:
"""Convert milliseconds to a time string like '5:22'."""
seconds = millis // 1000
minutes = seconds // 60
seconds = seconds % 60
return f"{minutes}:{seconds:02d}"
def get_chapters(self, video_id: str) -> str:
"""
Get chapters for a YouTube video ID and return as JSON string.
Returns chapter data with title and time on success, empty JSON array on failure.
"""
try:
html = self._get_video(video_id)
if not html:
print("No HTML content retrieved")
return "[]"
script_data = self._get_script(html)
if not script_data:
print("No script data extracted")
return "[]"
yt_initial_data = json.loads(script_data)
# Navigate to the chapters location
try:
player_overlays = yt_initial_data["playerOverlays"]
overlay_renderer = player_overlays["playerOverlayRenderer"]
decorated_bar = overlay_renderer["decoratedPlayerBarRenderer"]["decoratedPlayerBarRenderer"]
player_bar = decorated_bar["playerBar"]
multi_markers = player_bar["multiMarkersPlayerBarRenderer"]
markers_map = multi_markers["markersMap"]
if not markers_map or len(markers_map) == 0:
print("No markersMap data found")
return "[]"
chapters_data = markers_map[0]["value"]["chapters"]
chapters = []
for chapter_item in chapters_data:
chapter = chapter_item["chapterRenderer"]
time_millis = chapter["timeRangeStartMillis"]
time_str = self._millis_to_time(time_millis)
chapters.append({
"title": chapter["title"]["simpleText"],
"time": time_str
})
# Return as JSON string
return json.dumps(chapters)
except KeyError as key_err:
print(f"Key error in path: Missing key {key_err}")
return "[]"
except json.JSONDecodeError as json_err:
print(f"JSON parsing error: {json_err}")
return "[]"
except Exception as error:
print(f"Unexpected error in get_chapters: {error}")
return "[]"
# Singleton instance for use in other applications
youtube_chapters_getter = YoutubeChaptersGetter()
def get_youtube_chapters(video_id: str) -> str:
"""
Public function to get YouTube chapters as JSON string.
Takes a video ID and returns chapters or empty JSON array.
"""
return youtube_chapters_getter.get_chapters(video_id)
# Example usage when run directly
if __name__ == "__main__":
result = get_youtube_chapters("YOUTUBE_VIDEO_ID_COMES_HERE")
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment