Skip to content

Instantly share code, notes, and snippets.

@robertknight
Last active August 1, 2024 12:11
Show Gist options
  • Save robertknight/b1216e87d4831fccaf7a03a01421d167 to your computer and use it in GitHub Desktop.
Save robertknight/b1216e87d4831fccaf7a03a01421d167 to your computer and use it in GitHub Desktop.
YouTube captions API test
# Script to fetch and print the transcript for a YouTube video using the
# YouTube Data API v3.
#
# 1. Create a project in the Google API Console
# 2. Enable the YouTube Data API v3 for the new project
# 3. Create credentials for a "Desktop" OAuth client. Download the JSON file
# containing the credentials at the end of the setup process.
# 4. Create a new virtualenv and install dependencies with:
#
# ```
# pip install google-auth-oauthlib requests
# ```
# 5. Edit the `video_id` variable below to reference a video that you are
# the owner of. If you specify a video owned by someone else, the request
# to download captions will return a 403.
# 6. Run the script and authenticate in a browser when prompted. After
# authentication, the transcript should be fetched and printed.
import argparse
import json
import os
from google.oauth2.credentials import Credentials
import google_auth_oauthlib
import requests
# Credentials for a "Desktop" OAuth client, downloaded from the Google API Console.
credentials_file = "youtube-client.json"
# ID of a YouTube video to fetch captions for.
video_id = "-MEhsla5YZc"
# Path where OAuth 2 credentials are persisted.
saved_credentials = "saved_oauth_credentials.json"
scopes = ["https://www.googleapis.com/auth/youtube.force-ssl"]
if not os.path.exists(saved_credentials):
with open(credentials_file, "r") as fp:
creds = json.load(fp)
client_id = creds["installed"]["client_id"]
client_secret = creds["installed"]["client_secret"]
credentials = google_auth_oauthlib.get_user_credentials(
scopes, client_id, client_secret
)
with open(saved_credentials, "w") as fp:
fp.write(credentials.to_json())
else:
credentials = Credentials.from_authorized_user_file(
saved_credentials, scopes=scopes
)
auth_headers = {}
credentials.apply(auth_headers)
# Get ID of first available caption track for the video.
# See https://developers.google.com/youtube/v3/docs/captions/list.
captions_list_rsp = requests.get(
"https://www.googleapis.com/youtube/v3/captions",
params={"part": "id", "videoId": video_id},
headers=auth_headers,
)
captions_list_rsp.raise_for_status()
captions_list_json = captions_list_rsp.json()
captions_id = captions_list_json["items"][0]["id"]
# Attempt to download the captions.
# See https://developers.google.com/youtube/v3/docs/captions/download.
captions_rsp = requests.get(
f"https://www.googleapis.com/youtube/v3/captions/{captions_id}",
headers=auth_headers,
)
captions_rsp.raise_for_status()
transcript = captions_rsp.text
print(transcript)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment