Last active
August 1, 2024 12:11
-
-
Save robertknight/b1216e87d4831fccaf7a03a01421d167 to your computer and use it in GitHub Desktop.
YouTube captions API test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to fetch and print the transcript for a YouTube video using the | |
# YouTube Data API v3. | |
# | |
# 1. Create a project in the Google API Console | |
# 2. Enable the YouTube Data API v3 for the new project | |
# 3. Create credentials for a "Desktop" OAuth client. Download the JSON file | |
# containing the credentials at the end of the setup process. | |
# 4. Create a new virtualenv and install dependencies with: | |
# | |
# ``` | |
# pip install google-auth-oauthlib requests | |
# ``` | |
# 5. Edit the `video_id` variable below to reference a video that you are | |
# the owner of. If you specify a video owned by someone else, the request | |
# to download captions will return a 403. | |
# 6. Run the script and authenticate in a browser when prompted. After | |
# authentication, the transcript should be fetched and printed. | |
import argparse | |
import json | |
import os | |
from google.oauth2.credentials import Credentials | |
import google_auth_oauthlib | |
import requests | |
# Credentials for a "Desktop" OAuth client, downloaded from the Google API Console. | |
credentials_file = "youtube-client.json" | |
# ID of a YouTube video to fetch captions for. | |
video_id = "-MEhsla5YZc" | |
# Path where OAuth 2 credentials are persisted. | |
saved_credentials = "saved_oauth_credentials.json" | |
scopes = ["https://www.googleapis.com/auth/youtube.force-ssl"] | |
if not os.path.exists(saved_credentials): | |
with open(credentials_file, "r") as fp: | |
creds = json.load(fp) | |
client_id = creds["installed"]["client_id"] | |
client_secret = creds["installed"]["client_secret"] | |
credentials = google_auth_oauthlib.get_user_credentials( | |
scopes, client_id, client_secret | |
) | |
with open(saved_credentials, "w") as fp: | |
fp.write(credentials.to_json()) | |
else: | |
credentials = Credentials.from_authorized_user_file( | |
saved_credentials, scopes=scopes | |
) | |
auth_headers = {} | |
credentials.apply(auth_headers) | |
# Get ID of first available caption track for the video. | |
# See https://developers.google.com/youtube/v3/docs/captions/list. | |
captions_list_rsp = requests.get( | |
"https://www.googleapis.com/youtube/v3/captions", | |
params={"part": "id", "videoId": video_id}, | |
headers=auth_headers, | |
) | |
captions_list_rsp.raise_for_status() | |
captions_list_json = captions_list_rsp.json() | |
captions_id = captions_list_json["items"][0]["id"] | |
# Attempt to download the captions. | |
# See https://developers.google.com/youtube/v3/docs/captions/download. | |
captions_rsp = requests.get( | |
f"https://www.googleapis.com/youtube/v3/captions/{captions_id}", | |
headers=auth_headers, | |
) | |
captions_rsp.raise_for_status() | |
transcript = captions_rsp.text | |
print(transcript) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment