Created
March 7, 2024 19:43
-
-
Save TERNION-1121/fd77fe587acf079d33bc62cd094bd081 to your computer and use it in GitHub Desktop.
YT-Comments-Extractor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from API_KEY import API_KEY | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
from json import dump | |
def get_video_comments(videoID: str, max_results: int = 20): | |
assert isinstance(videoID, str), f"videoID should be a string, received {videoID.__class__.__name__}" | |
assert isinstance(max_results, int) and 1 <= max_results <= 100, \ | |
f"max_results should be an unsigned integer in range [1, 100] inclusive, received {max_results} of type {max_results.__class__.__name__}" | |
youtube = build( | |
'youtube', | |
'v3', | |
developerKey=API_KEY | |
) | |
comments = set() | |
try: | |
# initial request | |
comments_response = youtube.commentThreads().list( | |
part='snippet', | |
videoId=videoID, | |
textFormat='plainText', | |
maxResults=max_results, | |
).execute() | |
# extract comment snippets | |
for comment in comments_response['items']: | |
snippet = comment['snippet']['topLevelComment']['snippet']['textDisplay'] | |
comments.add(snippet) | |
# check for additional pages | |
while 'nextPageToken' in comments_response: | |
next_page_token = comments_response['nextPageToken'] | |
# make request | |
comments_response = youtube.commentThreads().list( | |
part='snippet', | |
videoId=videoID, | |
textFormat='plainText', | |
maxResults=max_results, | |
pageToken=next_page_token | |
).execute() | |
# extract comment snippets | |
for comment in comments_response['items']: | |
snippet = comment['snippet']['topLevelComment']['snippet']['textDisplay'] | |
comments.add(snippet) | |
except HttpError as e: | |
print(f"An error occurred: {e}") | |
return tuple(comments) | |
def save_to_json(data, filename='comments.json'): | |
with open(filename, 'w', encoding='utf-8') as json_file: | |
dump(data, json_file, ensure_ascii=False, indent=4) | |
def main(): | |
VIDEO_ID = '' | |
MAX_RESULTS = 100 | |
comments = get_video_comments(VIDEO_ID, MAX_RESULTS) | |
save_to_json(comments, filename='youtube_comments.json') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment