Last active
September 29, 2020 06:21
-
-
Save martjanz/f6092184e6c0cabf2b1c7a8bd8cf733e to your computer and use it in GitHub Desktop.
YouTube Channel Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# YouTube Channel Downloader | |
# | |
# Download all videos from all user/channel playlists | |
# | |
# TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each. | |
import json | |
import re | |
import time | |
import traceback | |
from urllib.request import urlopen | |
# External dependencies | |
# - jsonpath_rw | |
# - pytube3 | |
# - requests | |
from jsonpath_rw import parse | |
from pytube import Playlist | |
from pytube import YouTube | |
import requests | |
# -- Parameters -- | |
media_type = 'video' # or 'audio' | |
# Channel playlists to download | |
channel_name = 'ArchivoHistóricoRTA' | |
def get_channel_playlists(yt_username): | |
channel_playlists_url = 'https://www.youtube.com/c/{}/playlists'.format(yt_username) | |
html = requests.get(channel_playlists_url).text | |
# Get json metadata from HTML | |
_js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)") | |
raw_json = _js_regex.search(html).group(1)[0:-1] | |
json_decoded = json.loads(raw_json) | |
jsonpath = '$..gridRenderer.items..gridPlaylistRenderer' | |
jsonpath_parsed = parse(jsonpath) | |
playlists = [match.value for match in jsonpath_parsed.find(json_decoded)] | |
items = [] | |
for playlist in playlists: | |
items.append({ | |
'id': playlist['playlistId'], | |
'title': playlist['title']['runs'][0]['text'] | |
}) | |
return items | |
# -- Code -- | |
def download_audio(url, path='.'): | |
print('Downloading audio from {}...'.format(url)) | |
try: | |
YouTube(video_url) \ | |
.streams \ | |
.filter(only_audio=True, file_extension='mp4')[0] \ | |
.download() | |
except Exception as e: | |
traceback.print_exc() | |
pass | |
def download_video(url, path='.'): | |
print('Downloading video from {}...'.format(url)) | |
try: | |
YouTube(url) \ | |
.streams \ | |
.filter(progressive=True, file_extension='mp4') \ | |
.order_by('resolution') \ | |
.desc() \ | |
.first() \ | |
.download(path) | |
except Exception as e: | |
traceback.print_exc() | |
pass | |
playlists = get_channel_playlists(channel_name) | |
for playlist in playlists: | |
yt_playlist = Playlist('https://www.youtube.com/playlist?list={}'.format(playlist['id'])) | |
dest_path = playlist['title'] | |
print('Downloading {} {}s from playlist...'.format(len(yt_playlist.video_urls), media_type)) | |
for video_url in yt_playlist.video_urls: | |
if media_type == 'video': | |
download_video(video_url, dest_path) # Download audio | |
elif media_type == 'audio': | |
download_audio(video_url, dest_path) # Download video (with audio) | |
else: | |
print('Media type not supported. Check "media_type" variable.') | |
# Throttle to avoid YouTube restriction (Too many requests) | |
time.sleep(3) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jsonpath_rw | |
pytube3 | |
requests |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment