Created
February 5, 2024 20:09
-
-
Save softyoda/939a3ba825894d157726a248de536f2b to your computer and use it in GitHub Desktop.
Scrap STR, Description, Title of all video of a youtube channel.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import SRTFormatter | |
import json | |
from datetime import timedelta | |
def download_channel_videos(api_key, channel_id): | |
base_url = "https://www.googleapis.com/youtube/v3" | |
videos_url = f"{base_url}/search?key={api_key}&channelId={channel_id}&part=snippet,id&order=date&maxResults=50" | |
video_details_list = [] | |
try: | |
# Chargez le JSON existant s'il existe | |
with open('channel_videos.json', 'r', encoding='utf-8') as existing_file: | |
video_details_list = json.load(existing_file) | |
except FileNotFoundError: | |
pass | |
total_videos = len(video_details_list) # Obtenez le nombre de vidéos déjà enregistrées | |
while True: | |
response = requests.get(videos_url) | |
videos = response.json() | |
for video in videos.get('items', []): | |
if video['id']['kind'] == "youtube#video": | |
video_id = video['id']['videoId'] | |
# Vérifiez si la vidéo est déjà enregistrée dans le JSON existant | |
existing_video = next((v for v in video_details_list if v['video_id'] == video_id), None) | |
if existing_video: | |
# Si la vidéo existe déjà dans le JSON, passez à la suivante | |
print(f"Video ID: {video_id} already exists in JSON.") | |
print(f"({total_videos}/{videos['pageInfo']['totalResults']})") | |
continue | |
video_data = { | |
'video_id': video_id, | |
'video_full_url': f"https://www.youtube.com/watch?v={video_id}" | |
} | |
# Récupérer les métadonnées de la vidéo | |
video_details_url = f"{base_url}/videos?id={video_id}&key={api_key}&part=snippet,statistics,contentDetails" | |
video_details_response = requests.get(video_details_url) | |
video_details = video_details_response.json() | |
if video_details['items']: | |
item = video_details['items'][0] | |
snippet = item['snippet'] | |
statistics = item['statistics'] | |
content_details = item['contentDetails'] | |
video_data['video_title'] = snippet['title'] | |
video_data['video_description'] = snippet['description'] | |
video_data['video_view_count'] = statistics['viewCount'] | |
# Durée de la vidéo au format ISO 8601 | |
video_length_iso = content_details['duration'] | |
video_data['video_length'] = video_length_iso # Conservez la durée au format ISO 8601 | |
# Date de publication de la vidéo | |
video_data['release_date'] = snippet['publishedAt'] | |
# Récupérer les sous-titres | |
try: | |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
transcript = transcript_list.find_transcript(['fr']) | |
# Texte complet sans les temps | |
video_data['video_subtitle_text'] = ' '.join([i['text'] for i in transcript.fetch()]) | |
# Sous-titres format SRT | |
formatter = SRTFormatter() | |
video_data['video_subtitle_srt'] = formatter.format_transcript(transcript.fetch()) | |
except Exception as e: | |
print(f"Error while fetching subtitles for video ID {video_id}: {e}") | |
video_details_list.append(video_data) | |
total_videos += 1 | |
print(f"Downloaded details for video ID: {video_id} ({total_videos}/{videos['pageInfo']['totalResults']})") | |
if 'nextPageToken' in videos: | |
nextPageToken = videos['nextPageToken'] | |
videos_url = f"{videos_url}&pageToken={nextPageToken}" | |
else: | |
break | |
with open('channel_videos.json', 'w', encoding='utf-8') as f: | |
json.dump(video_details_list, f, ensure_ascii=False, indent=4) | |
print("All video details downloaded and saved to 'channel_videos.json'.") | |
api_key = 'API_TOKEN' # Remplacez par votre clé API | |
channel_id = 'CHANNEL_ID' # Remplacez par l'ID de la chaîne YouTube (dans le code source de la page youtube si y'a un alias) | |
download_channel_videos(api_key, channel_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment