pip install google-api-python-client
Key didapatkan dari Google Cloud
def_api_key = 'xxx'
Tambahkan library
from googleapiclient.discovery import build
import pandas as pd
import time
Jalankan Script
video_id = 'yQItJNFbkpc' # contoh video
youtube = build('youtube', 'v3', developerKey=def_api_key)
def get_comments(video_id):
comments = []
request = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100,
textFormat='plainText'
)
while request:
response = request.execute()
for item in response['items']:
top_comment = item['snippet']['topLevelComment']['snippet']
comments.append({
'author': top_comment['authorDisplayName'],
'comment': top_comment['textDisplay'],
'likes': top_comment['likeCount'],
'is_reply': False
})
# nested comments (replies)
if 'replies' in item:
for reply in item['replies']['comments']:
reply_snippet = reply['snippet']
comments.append({
'author': reply_snippet['authorDisplayName'],
'comment': reply_snippet['textDisplay'],
'likes': reply_snippet['likeCount'],
'is_reply': True
})
request = youtube.commentThreads().list_next(request, response)
return comments
# Ambil semua komentar
all_comments = get_comments(video_id)
# Simpan ke DataFrame
df = pd.DataFrame(all_comments)
# Tampilkan beberapa baris
print(df.head())
# Simpan ke CSV (opsional)
df.to_csv("youtube_comments.csv", index=False)
# Ganti dengan Channel ID (bukan username)
channel_id = 'xxx' # contoh: Google Developers
# Inisialisasi client
youtube = build('youtube', 'v3', developerKey=def_api_key)
def get_uploads_playlist_id(channel_id):
response = youtube.channels().list(
part='contentDetails',
id=channel_id
).execute()
if 'items' not in response or not response['items']:
print("Channel tidak ditemukan.")
return None
uploads_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
return uploads_id
# Jalankan
uploads_playlist_id = get_uploads_playlist_id(channel_id)
print(f"Playlist uploads ID: {uploads_playlist_id}")
Dapatkan playlist id
# Playlist tersembunyi
def_playlist_id = uploads_playlist_id
Jalankan script
# Inisialisasi client
youtube = build('youtube', 'v3', developerKey=def_api_key)
def get_videos_from_playlist(playlist_id):
video_ids = []
next_page_token = None
while True:
res = youtube.playlistItems().list(
part='contentDetails',
playlistId=playlist_id,
maxResults=50,
pageToken=next_page_token
).execute()
for item in res['items']:
video_ids.append(item['contentDetails']['videoId'])
next_page_token = res.get('nextPageToken')
if not next_page_token:
break
time.sleep(0.1)
return video_ids
def get_video_details(video_ids):
video_data = []
for i in range(0, len(video_ids), 50):
batch_ids = video_ids[i:i+50]
res = youtube.videos().list(
part='snippet,statistics',
id=','.join(batch_ids)
).execute()
for item in res['items']:
data = {
'videoId': item['id'],
'title': item['snippet']['title'],
'publishedAt': item['snippet']['publishedAt'],
'viewCount': int(item['statistics'].get('viewCount', 0)),
'commentCount': int(item['statistics'].get('commentCount', 0))
}
video_data.append(data)
return video_data
# Ambil semua video ID dari playlist
video_ids = get_videos_from_playlist(def_playlist_id)
# Ambil detail dari video-video tersebut
video_details = get_video_details(video_ids)
# Simpan ke DataFrame
df = pd.DataFrame(video_details)
# Tampilkan 5 baris pertama
print(df.head())
# Simpan ke file CSV (opsional)
df.to_csv("video_playlist.csv", index=False)
youtube = build('youtube', 'v3', developerKey=def_api_key)
# id pemillik
def_id_pemilik = "@dirumahrafif"
# tentukan jumlah video teratas
def_jumlah_video = 80
# Baca CSV video_playlist.csv
df_videos = pd.read_csv('video_playlist.csv')
# Urutkan berdasarkan commentCount descending dan ambil video teratas
df_videos_sorted = df_videos.sort_values(by='commentCount', ascending=False).head(def_jumlah_video)
# Buat dict videoId ke title dari data yang sudah disortir
video_title_map = dict(zip(df_videos_sorted['videoId'], df_videos_sorted['title']))
# Ambil videoId list yang sudah disortir dan dibatasi
video_ids = df_videos_sorted['videoId'].tolist()
def get_comments(video_id):
comments = []
request = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100,
textFormat='plainText'
)
while request:
response = request.execute()
for item in response['items']:
top_comment = item['snippet']['topLevelComment']['snippet']
if (top_comment['authorDisplayName'] == def_id_pemilik and def_id_pemilik != ''):
continue
comments.append({
'author': top_comment['authorDisplayName'],
'comment': top_comment['textDisplay'],
'likeCount': top_comment['likeCount'],
'isReply': False
})
if 'replies' in item:
for reply in item['replies']['comments']:
reply_snippet = reply['snippet']
if (reply_snippet['authorDisplayName'] == def_id_pemilik and def_id_pemilik != ''):
continue
comments.append({
'author': reply_snippet['authorDisplayName'],
'comment': reply_snippet['textDisplay'],
'likeCount': reply_snippet['likeCount'],
'isReply': True
})
request = youtube.commentThreads().list_next(request, response)
time.sleep(0.1)
return comments
def get_video_title(video_id):
return video_title_map.get(video_id, 'Unknown Title')
all_comment_data = []
batch_size = 10
for i in range(0, len(video_ids), batch_size):
batch = video_ids[i:i+batch_size]
print(f"Processing batch videos {i+1} to {i+len(batch)}")
for video_id in batch:
print(f" Mengambil komentar video {video_id}")
title = get_video_title(video_id)
comments = get_comments(video_id)
for c in comments:
all_comment_data.append({
'videoId': video_id,
'title': title,
'author': c['author'],
'comment': c['comment'],
'likeCount': c['likeCount'],
'isReply': c['isReply']
})
time.sleep(10)
df_comments = pd.DataFrame(all_comment_data)
print(df_comments.head())
df_comments.to_csv('video_comments.csv', index=False)