Created
December 26, 2018 17:49
-
-
Save ogurets/d68ac50d0fd4bcc441e69b0b51b665a2 to your computer and use it in GitHub Desktop.
Youtube WL list download method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2.7 | |
# coding=utf-8 | |
""" | |
Ok, Google, your decision about removing WL from the API sucks! | |
""" | |
import ujson as json | |
import os | |
from tornado.httpclient import AsyncHTTPClient, HTTPClient, HTTPError, HTTPRequest | |
from tornado.ioloop import IOLoop | |
import selenium | |
from selenium import webdriver | |
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary | |
import urllib | |
import random | |
# FIREFOX | |
# Will not activate WebDriver plugin unless: | |
# xpinstall.signatures.required in about:config | |
# https://stackoverflow.com/questions/37247336/selenium-use-of-firefox-profile | |
# Doesn't work! | |
# Alternative: https://github.com/5digits/dactyl/wiki/Disable-extension-signing-requirement-in-Firefox-49-or-later | |
class DebugDump(object): | |
def __init__(self, filename): | |
self.fp = open(filename, 'wt') | |
def __del__(self): | |
self.fp.close() | |
def write(self, b): | |
self.fp.write(b) | |
FIREFOX_PROFILE_LOCATION = r'%APPDATA%\Mozilla\Firefox\Profiles\<some_profile>' | |
FIREFOX_BINARY_LOCATION = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe' | |
def downloadWL(filename): | |
videolist = DebugDump(filename) | |
videolist.write('[') | |
http = HTTPClient() | |
profile = webdriver.FirefoxProfile(FIREFOX_PROFILE_LOCATION) # This profile is used as a template, not directly | |
binary = FirefoxBinary(FIREFOX_BINARY_LOCATION) | |
sDriver = webdriver.Firefox(profile, binary, capabilities={"marionette": False}) # https://selenium2.ru/news/188-firefox-esr-52.html | |
sDriver.get('https://www.youtube.com/playlist?list=WL') | |
# IMPORTANT! To reuse existing session/cookies, log in using the profile listed as template above! (using your browser, not python) | |
pass | |
# Prepare headers for Tornado | |
uagent = sDriver.execute_script("return navigator.userAgent;") | |
cookies = sDriver.get_cookies() | |
cookies = filter(lambda x: 'youtube.com' in x['domain'], cookies) | |
cookies = u'; '.join([u'{}={}'.format(x[u'name'], x[u'value']) for x in cookies]) | |
ytcfg = sDriver.execute_script('return ytcfg.data_;') | |
headers = { | |
'X-YouTube-Client-Name': '1', | |
'X-YouTube-Client-Version': str(ytcfg['INNERTUBE_CONTEXT_CLIENT_VERSION']), | |
'X-YouTube-Page-CL': str(ytcfg['PAGE_CL']), | |
'X-Youtube-Identity-Token': ytcfg['ID_TOKEN'], | |
'X-YouTube-Page-Label': ytcfg['PAGE_BUILD_LABEL'], # responseContext.serviceTrackingParams...key=innertube.build.label, | |
'X-YouTube-Variants-Checksum': str(ytcfg['VARIANTS_CHECKSUM']), | |
'X-YouTube-Utc-Offset': '180', | |
'X-SPF-Referer': 'https://www.youtube.com/playlist?list=WL', | |
'X-SPF-Previous': 'https://www.youtube.com/playlist?list=WL', | |
'Referer': 'https://www.youtube.com/playlist?list=WL', | |
'Cookie': cookies | |
} | |
def buildContURL(ct): | |
return u'https://www.youtube.com/browse_ajax?{}'.format(urllib.urlencode({'ctoken': ct['continuation'], 'continuation': ct['continuation'], 'itct': ct['clickTrackingParams']})) | |
moreurl = None | |
if 0: | |
# Old design | |
# Get titles already loaded | |
for vid in sDriver.find_elements_by_class_name('pl-video'): | |
print(u'{}: {}'.format(vid.get_attribute('data-title'), vid.get_attribute('data-video-id'))) | |
# Get link for more | |
morebtn = sDriver.find_element_by_class_name('load-more-button') | |
moreurl = u'https://www.youtube.com' + morebtn.get_attribute('data-uix-load-more-href') | |
else: | |
# New design | |
# JS video list: window["ytInitialData"].contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer.contents | |
# JS continuation: window["ytInitialData"].contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer.continuations[0].nextContinuationData | |
ct = sDriver.execute_script('return window["ytInitialData"].contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer.continuations[0].nextContinuationData;') | |
#moreurl = u'https://www.youtube.com/browse_ajax?ctoken={}&continuation={}&itct={}'.format(ct['continuation'], ct['continuation'], ct['clickTrackingParams']) | |
moreurl = buildContURL(ct) | |
while moreurl is not None: | |
try: | |
resp = http.fetch(HTTPRequest(moreurl, user_agent=uagent, headers=headers)) | |
js = json.loads(resp.body) | |
js = js[1]['response']['continuationContents']['playlistVideoListContinuation'] | |
for vid in js['contents']: | |
print(u'{}: {}'.format(vid['playlistVideoRenderer']['videoId'], vid['playlistVideoRenderer']['title']['simpleText'])) | |
if 'continuations' in js: | |
ct = js['continuations'][0]['nextContinuationData'] | |
moreurl = buildContURL(ct) | |
videolist.write(json.dumps(js['contents'])) | |
else: | |
moreurl = None | |
if moreurl is None: | |
videolist.write(']') | |
else: | |
videolist.write(', ') | |
except HTTPError as e: | |
# HTTPError is raised for non-200 responses; the response | |
# can be found in e.response. | |
print("Error: " + str(e)) | |
moreurl = None | |
except Exception as e: | |
# Other errors are possible, such as IOError. | |
print("Error: " + str(e)) | |
moreurl = None | |
sDriver.close() | |
def loadSavedWL(filename): | |
with open(filename, 'r') as fp: | |
js = json.load(fp) | |
videos = [] | |
for packet in js: | |
for vid in packet: | |
videos.append((vid['playlistVideoRenderer']['videoId'], vid['playlistVideoRenderer']['title']['simpleText'])) | |
return videos | |
if __name__ == '__main__': | |
# Load/update the list from Youtube | |
# downloadWL('videolist.txt') | |
# Load, shuffle and display | |
videos = loadSavedWL('videolist.txt') | |
# TODO: build html page with videos (and thumbnails) | |
random.shuffle(videos) | |
for (vid, title) in videos: | |
print(u'https://www.youtube.com/watch?v={}'.format(vid)) | |
print(u'\t{}'.format(title)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment