martjanz · September 29, 2020 06:21
diff --git a/channel-downloader.py b/channel-downloader.py
 # YouTube Channel Downloader
 # 
 # Download all videos from all user/channel playlists
 #
 # TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each.
 import json
 import re
 import time
 import traceback
 from urllib.request import urlopen

 # External dependencies
 # - jsonpath_rw
 # - pytube3
 # - requests

 from jsonpath_rw import parse
 from pytube import Playlist
 from pytube import YouTube
 import requests

 # -- Parameters --
 media_type = 'video' # or 'audio'

 # Channel playlists to download
 channel_name = 'ArchivoHistóricoRTA'

 def get_channel_playlists(yt_username):
  channel_playlists_url = 'https://www.youtube.com/c/{}/playlists'.format(yt_username)

  html = requests.get(channel_playlists_url).text

  # Get json metadata from HTML
  _js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)")
  raw_json = _js_regex.search(html).group(1)[0:-1]
  json_decoded = json.loads(raw_json)

  jsonpath = '$..gridRenderer.items..gridPlaylistRenderer'

  jsonpath_parsed = parse(jsonpath)
  
  playlists = [match.value for match in jsonpath_parsed.find(json_decoded)]
  
  items = []
  for playlist in playlists:
    items.append({
      'id': playlist['playlistId'],
      'title': playlist['title']['runs'][0]['text']
    })

  return items

 # -- Code --
 def download_audio(url, path='.'):
  print('Downloading audio from {}...'.format(url))
      
  try:
    YouTube(video_url) \
      .streams \
      .filter(only_audio=True, file_extension='mp4')[0] \
      .download()
  except Exception as e:
    traceback.print_exc()
    pass

 def download_video(url, path='.'):
  print('Downloading video from {}...'.format(url))
  
  try:
    YouTube(url) \
      .streams \
      .filter(progressive=True, file_extension='mp4') \
      .order_by('resolution') \
      .desc() \
      .first() \
      .download(path)
  except Exception as e:
    traceback.print_exc()
    pass

 playlists = get_channel_playlists(channel_name)

 for playlist in playlists:
  yt_playlist = Playlist('https://www.youtube.com/playlist?list={}'.format(playlist['id']))
  dest_path = playlist['title']

  print('Downloading {} {}s from playlist...'.format(len(yt_playlist.video_urls), media_type))

  for video_url in yt_playlist.video_urls:
    if media_type == 'video':
      download_video(video_url, dest_path) # Download audio
    elif media_type == 'audio':
      download_audio(video_url, dest_path) # Download video (with audio)
    else:
      print('Media type not supported. Check "media_type" variable.')

    # Throttle to avoid YouTube restriction (Too many requests)
    time.sleep(3)
diff --git a/requirements.txt b/requirements.txt
 jsonpath_rw
 pytube3
 requests
	# YouTube Channel Downloader
	#
	# Download all videos from all user/channel playlists
	#
	# TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each.
	import json
	import re
	import time
	import traceback
	from urllib.request import urlopen

	# External dependencies
	# - jsonpath_rw
	# - pytube3
	# - requests

	from jsonpath_rw import parse
	from pytube import Playlist
	from pytube import YouTube
	import requests

	# -- Parameters --
	media_type = 'video' # or 'audio'

	# Channel playlists to download
	channel_name = 'ArchivoHistóricoRTA'

	def get_channel_playlists(yt_username):
	channel_playlists_url = 'https://www.youtube.com/c/{}/playlists'.format(yt_username)

	html = requests.get(channel_playlists_url).text

	# Get json metadata from HTML
	_js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)")
	raw_json = _js_regex.search(html).group(1)[0:-1]
	json_decoded = json.loads(raw_json)

	jsonpath = '$..gridRenderer.items..gridPlaylistRenderer'

	jsonpath_parsed = parse(jsonpath)

	playlists = [match.value for match in jsonpath_parsed.find(json_decoded)]

	items = []
	for playlist in playlists:
	items.append({
	'id': playlist['playlistId'],
	'title': playlist['title']['runs'][0]['text']
	})

	return items

	# -- Code --
	def download_audio(url, path='.'):
	print('Downloading audio from {}...'.format(url))

	try:
	YouTube(video_url) \
	.streams \
	.filter(only_audio=True, file_extension='mp4')[0] \
	.download()
	except Exception as e:
	traceback.print_exc()
	pass

	def download_video(url, path='.'):
	print('Downloading video from {}...'.format(url))

	try:
	YouTube(url) \
	.streams \
	.filter(progressive=True, file_extension='mp4') \
	.order_by('resolution') \
	.desc() \
	.first() \
	.download(path)
	except Exception as e:
	traceback.print_exc()
	pass

	playlists = get_channel_playlists(channel_name)

	for playlist in playlists:
	yt_playlist = Playlist('https://www.youtube.com/playlist?list={}'.format(playlist['id']))
	dest_path = playlist['title']

	print('Downloading {} {}s from playlist...'.format(len(yt_playlist.video_urls), media_type))

	for video_url in yt_playlist.video_urls:
	if media_type == 'video':
	download_video(video_url, dest_path) # Download audio
	elif media_type == 'audio':
	download_audio(video_url, dest_path) # Download video (with audio)
	else:
	print('Media type not supported. Check "media_type" variable.')

	# Throttle to avoid YouTube restriction (Too many requests)
	time.sleep(3)