Last active
May 18, 2022 13:23
-
-
Save hhsprings/7bad5a4a07f89f7d818b4810504dae52 to your computer and use it in GitHub Desktop.
Import a youtube playlist and make it your local html.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import io | |
import json | |
import re | |
import sys | |
import os | |
from urllib.request import urlretrieve | |
from urllib.request import unquote, quote | |
import bs4 | |
_MEDIALISTSCRIPTTMPL = "var medialist = {medialist};" | |
_HTMLTMPL = """\ | |
<html> | |
<head jang="ja"> | |
<meta charset="UTF-8"> | |
<title>{pagetitle}</title> | |
<link href="https://cdnjs.cloudflare.com/ajax/libs/tabulator/5.2.2/css/tabulator_site.min.css" | |
rel="stylesheet"> | |
<script | |
type="text/javascript" | |
src="https://cdnjs.cloudflare.com/ajax/libs/tabulator/5.2.2/js/tabulator.min.js"> | |
</script> | |
<script | |
type="text/javascript" | |
src="https://code.jquery.com/jquery-3.6.0.min.js"> | |
</script> | |
<script> | |
var play_on_select = false; | |
new URL(window.location.href).search.slice(1).split("&").forEach(function (p) {{ | |
let kv = p.split("="); | |
if (kv[0] == "play_on_select" && kv[1] == "1") {{ | |
play_on_select = true; | |
}} | |
}}); | |
</script> | |
<style> | |
body {{ | |
margin: 1pt !important; | |
}} | |
.tabulator-header {{ | |
font-size: 0.8em !important; | |
}} | |
.tabulator-cell {{ | |
font-size: 0.8em !important; | |
padding: 1px 4px !important; | |
}} | |
.tabulator-footer {{ | |
padding: 0 !important; | |
}} | |
.tabulator-pagenator {{ | |
padding: 0 !important; | |
}} | |
.tabulator-page {{ | |
font-size: xx-small !important; | |
}} | |
.button_playersize {{ | |
font-size: small !important; | |
padding: 1px 1.5px !important; | |
min-width: 6.5em; | |
}} | |
</style> | |
</head> | |
<body> | |
<table border=0 style="border-spacing: 0"> | |
<tr> | |
<td style='height: "100%"; vertical-align: top; min-width: 960'> | |
<div id="player"></div> | |
<!-- ### --> | |
<table border=0 style="border-spacing: 0"> | |
<tr> | |
<td> | |
<div style="margin-top: 1pt; line-height: 1.0;"> | |
<input id="s90" type="button" value="160 x 90" class="button_playersize"/> | |
<input id="s180" type="button" value="320 x 180" class="button_playersize"/> | |
<input id="s270" type="button" value="480 x 270" class="button_playersize"/> | |
<input id="s360" type="button" value="640 x 360" class="button_playersize"/> | |
<input id="s450" type="button" value="800 x 450" class="button_playersize"/> | |
<input id="s540" type="button" value="960 x 540" class="button_playersize"/> | |
<br/> | |
<input id="s630" type="button" value="1120 x 630" class="button_playersize"/> | |
<input id="s720" type="button" value="1280 x 720" class="button_playersize"/> | |
<input id="s810" type="button" value="1440 x 810" class="button_playersize"/> | |
<input id="s900" type="button" value="1600 x 900" class="button_playersize"/> | |
<input id="s990" type="button" value="1760 x 990" class="button_playersize"/> | |
<input id="s1080" type="button" value="1920 x 1080" class="button_playersize"/> | |
</div> | |
<div id="disp_stvid" style="margin-top: 4pt; line-height: 1.2 !important;"></div> | |
</td> | |
<td style="width: 0.5em;"></td> | |
<td> | |
<span id="clk1cont"></span> | |
</td> | |
<td style="min-width: 4em;"> | |
<span id="etoc_val" style="font-size: x-small"></span> | |
</td> | |
<td> | |
<span id="clk2cont"></span> | |
</td> | |
<td style="width: 0.5em;"></td> | |
<td style="line-height: 1.1 !important; vertical-align: bottom;"> | |
<!-- f, j, k, l, m, and 0-9 --> | |
<input id="pbr_value" | |
type="number" step="0.25" | |
min="0" max="2.0" value="1.0" | |
maxlength="4" | |
style="width: 5em" | |
/> | |
<input | |
id="pbr_btn" type="button" | |
value="↑↓" | |
title="setPlaybackRate" | |
accesskey="r" | |
/> | |
<br/> | |
<input | |
id="step_btn_bw" type="button" | |
value="<." | |
accesskey="a" | |
/> | |
<input id="step_value_sec" | |
type="number" step="0.1" | |
min="0" value="0.5" | |
maxlength="6" | |
style="width: 5em"/> | |
<input | |
id="step_btn_fw" type="button" | |
value=".>" | |
/> | |
</td> | |
</tr> | |
</table> | |
<!-- ### --> | |
</td> | |
<td style="width: 0.1em;" valign="top"> | |
<input id='toggle_display_playlist' type="button" value="-" style="height: 9em;"/> | |
</td> | |
<td id='playlist_container' style='height: "100%"; vertical-align: top;'> | |
<div id="playlist"></div> | |
</td> | |
</tr> | |
</table> | |
{medialist_script} | |
<script> | |
$(document).ready(function(){{ | |
$('input[type="number"]').on('keyup', function () {{ | |
let v = parseInt($(this).val()); | |
try {{ | |
min = parseInt($(this).attr('min')); | |
if (v < min) {{ | |
v = min; | |
$(this).val(v); | |
}} | |
}} catch (ex) {{ | |
}} | |
try {{ | |
max = parseInt($(this).attr('max')); | |
if (v > max){{ | |
v = max; | |
$(this).val(v); | |
}} | |
}} catch (ex) {{ | |
}} | |
}}) | |
}}) | |
$('#toggle_display_playlist').on('click', function (ev) {{ | |
let disp = $('#playlist_container').css('display'); | |
if (disp === 'none') {{ | |
$('#playlist_container').css('display', 'block'); | |
$('#toggle_display_playlist').val('-'); | |
}} else {{ | |
$('#playlist_container').css('display', 'none'); | |
$('#toggle_display_playlist').val('+'); | |
}} | |
}}); | |
function zeroPad(num, places) {{ | |
var zero = places - num.toString().length + 1; | |
return Array(+(zero > 0 && zero)).join("0") + num; | |
}} | |
function tsToTss(ts, frac) {{ | |
ts = parseFloat(ts).toFixed(frac); | |
let spl = ("" + ts).split(/\./); | |
let sign = ts > 0 ? "" : "-"; | |
let fp = spl[1]; | |
if (fp) {{ | |
fp = "." + fp; | |
}} else {{ | |
fp = ""; | |
}} | |
let d = parseInt(spl[0]); | |
d = Math.abs(d); | |
let ss_h = parseInt(d / 3600); | |
d -= ss_h * 3600; | |
let ss_m = parseInt(d / 60); | |
d -= ss_m * 60; | |
let ss_s = parseInt(d); | |
return (sign + ss_h + ":" + zeroPad(ss_m, 2) + ":" + zeroPad(ss_s, 2)) + fp; | |
}} | |
function drawClock(cntid, dt) {{ | |
let cid = cntid.replace("cont", ""); | |
let prv = document.getElementById(cid); | |
if (prv) {{ | |
prv.remove(); | |
}} | |
let canvas = document.createElement('canvas'); | |
canvas.id = cid; | |
canvas.width = "60"; | |
canvas.height = "60"; | |
let cntn = document.getElementById(cntid); | |
cntn.parentNode.insertBefore(canvas, cntn); | |
let ctx = canvas.getContext("2d"); | |
let radius = canvas.height / 2; | |
ctx.translate(radius, radius); | |
radius = radius * 0.90; | |
function _drawClock(ctx, radius, hour, minute, second) {{ | |
(function () {{ | |
ctx.beginPath(); | |
ctx.arc(0, 0, radius, 0, 2 * Math.PI); | |
ctx.fillStyle = 'white'; | |
ctx.fill(); | |
let grad = ctx.createRadialGradient(0, 0, radius * 0.95, 0, 0, radius * 1.05); | |
grad.addColorStop(0, '#333'); | |
grad.addColorStop(0.5, 'white'); | |
grad.addColorStop(1, '#333'); | |
ctx.strokeStyle = grad; | |
ctx.lineWidth = radius * 0.1; | |
ctx.stroke(); | |
ctx.beginPath(); | |
ctx.arc(0, 0, radius * 0.1, 0, 2 * Math.PI); | |
ctx.fillStyle = '#333'; | |
ctx.fill(); | |
}})(); | |
(function () {{ | |
function drawHand(pos, length, width) {{ | |
ctx.beginPath(); | |
ctx.lineWidth = width; | |
ctx.lineCap = "round"; | |
ctx.moveTo(0, 0); | |
ctx.rotate(pos); | |
ctx.lineTo(0, -length); | |
ctx.stroke(); | |
ctx.rotate(-pos); | |
}} | |
hour = hour % 12; | |
hour = (hour * Math.PI / 6) + (minute * Math.PI / (6 * 60)) + (second * Math.PI / (360 * 60)); | |
drawHand(hour, radius * 0.5, radius * 0.07); | |
minute = (minute * Math.PI / 30) + (second * Math.PI / (30 * 60)); | |
drawHand(minute, radius * 0.8, radius * 0.07); | |
/*second = (second * Math.PI / 30);*/ | |
/*drawHand(second, radius * 0.9, radius * 0.02);*/ | |
}})(); | |
(function () {{ | |
ctx.font = radius * 2 * 0.15 + "px arial"; | |
ctx.textBaseline = "middle"; | |
ctx.textAlign = "center"; | |
for (let num = 1; num <= 12; ++num){{ | |
let ang = num * Math.PI / 6; | |
ctx.rotate(ang); | |
ctx.translate(0, -radius * 0.75); | |
ctx.rotate(-ang); | |
ctx.fillText(num.toString(), 0, 0); | |
ctx.rotate(ang); | |
ctx.translate(0, radius * 0.75); | |
ctx.rotate(-ang); | |
}} | |
}})(); | |
ctx.translate(0, 0); | |
ctx.rotate(0); | |
}} | |
_drawClock(ctx, radius, dt.getHours(), dt.getMinutes(), dt.getSeconds()); | |
}} | |
// To understand youtube's IFrame Player API, | |
// see "https://developers.google.com/youtube/iframe_api_reference", | |
// and "https://developers.google.com/youtube/player_parameters". | |
var tag = document.createElement('script'); | |
tag.src = "https://www.youtube.com/iframe_api"; | |
var firstScriptTag = document.getElementsByTagName('script')[0]; | |
firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); | |
/* | |
* player: global player instance, which will be created on | |
* onYouTubeIframeAPIReady. | |
*/ | |
var player; | |
var dispstTimerid = null; | |
function dispPlayerStatus(upd) {{ | |
let st = player.getPlayerState(); | |
stt = ""; | |
stt += "STATUS: "; | |
if (st == 3 || st == -1) {{ | |
stt += '<span style="color: #F00; font-weight: bold">'; | |
}} | |
stt += '<span style="display: inline-block; width: 1.5em; text-align: right">' + st + "</span>"; | |
if (st == 3 || st == -1) {{ | |
stt += '</span>'; | |
}} | |
stt += ", "; | |
stt += "QUALITY: " + player.getPlaybackQuality() + ", "; | |
let pbr = player.getPlaybackRate(); | |
if (upd) {{ | |
$('#pbr_value').val(pbr); | |
}} | |
let ct = player.getCurrentTime(); | |
let dur = player.getDuration(); | |
let pospct = (100 * ct / dur); | |
stt += "POS: " + tsToTss(ct, 0) + "(" + pospct.toFixed(1) + "%), "; | |
let restsec = (dur - ct); | |
stt += "REST: " + tsToTss(restsec, 0); | |
let now = new Date(); | |
let etoc = (new Date(now.getTime() + (restsec / pbr) * 1000)); | |
drawClock("clk1cont", now); | |
drawClock("clk2cont", etoc); | |
$('#etoc_val').html( | |
"<span style='color: #aaa'>+ " + tsToTss(restsec / pbr, 0) + "</span>" + | |
"<br/> → " + etoc.toLocaleTimeString()); | |
stt = stt.replace( | |
/([A-Z_]+:)/g, | |
'<span style="font-size: x-small; color: #C0C0C0; text-decoration: underline">$1</span>') | |
stt = stt.replace( | |
/(\[.+\])/, | |
'<span style="color: #C0C0C0">$1</span>') | |
$('#disp_stvid').html( | |
'<span style="font-size: small;">' + stt + '</span>'); | |
if (dispstTimerid != null) {{ | |
clearInterval(dispstTimerid); | |
dispstTimerid = null; | |
}} | |
let intv = 60000; | |
if (st == 1 || st == 3) {{ | |
intv = 5000 / pbr; | |
}} | |
dispstTimerid = setInterval(function () {{ | |
dispPlayerStatus(false); | |
}}, intv); | |
}} | |
function onYouTubeIframeAPIReady() {{ | |
player = new YT.Player('player', {{ | |
width: "960", | |
height: "540", | |
playerVars: {{ | |
rel: 0, | |
}}, | |
// videoId will be set in setupPlaylist(). | |
events: {{ | |
"onReady": function (ev) {{ | |
setupPlaylist(); | |
}}, | |
"onStateChange": function (ev) {{ | |
dispPlayerStatus(true); | |
}}, | |
"onPlaybackQualityChange": function (ev) {{ | |
dispPlayerStatus(true); | |
}}, | |
"onPlaybackRateChange": function (ev) {{ | |
dispPlayerStatus(true); | |
}}, | |
}} | |
}}); | |
}} | |
// | |
function setupPlaylist() {{ | |
var table = new Tabulator("#playlist", {{ | |
"columnDefaults": {{ | |
"tooltip": function (e, cell, onRendered) {{ | |
// e - mouseover event | |
// cell - cell component | |
// onRendered - onRendered callback registration function | |
var el = document.createElement("div"); | |
let rowdat = cell.getData(); | |
let thumb = '<img src="' + rowdat["thumb"] + '" width="368" height="207" />'; | |
let dur = "[" + rowdat["duration"] + "]"; | |
let tit = '<div style="max-width: 36em;">' + rowdat["title"] + "</div>"; | |
el.innerHTML = thumb + dur + tit; | |
return el; | |
}}, | |
}}, | |
/*"layout": "fitDataStretch",*/ | |
"selectable": 1, | |
"pagination": "local", | |
"paginationSize": 28, | |
"paginationButtonCount": 10, | |
"headerFilterLiveFilterDelay": 1800, | |
"height": "920px", | |
"index": "videoid", | |
"columns": [ | |
{{ | |
"field": "title", | |
/*"frozen": true, */ | |
"title": "title", | |
"headerFilter": "input", | |
"headerFilterFunc": "regex", | |
"width": "67px", | |
"headerSortTristate": true, | |
}}, | |
{{ | |
"field": "thumb", | |
"formatter": "image", | |
"formatterParams": {{"width": 48, "height": 27}}, | |
"headerSortTristate": true, | |
}}, | |
{{ | |
"field": "duration", | |
"title": "duration", | |
"headerSortTristate": true, | |
}} | |
], | |
}}); | |
var first_select = true; | |
table.on("rowSelected", function(row) {{ | |
var videoid = row.getData().videoid; | |
if (play_on_select && !first_select) {{ | |
player.loadVideoById(videoid); | |
}} else {{ | |
player.cueVideoById(videoid); | |
}} | |
}}); | |
table.on("tableBuilt", function () {{ | |
table.setData(medialist); | |
table.selectRow(medialist[0]["videoid"]); | |
first_select = false; | |
}}); | |
function _setiframesize(s) {{ | |
player.setSize(parseInt(s[0]), parseInt(s[1])); | |
}}; | |
$('#s90').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s180').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s270').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s360').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s450').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s540').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s630').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s720').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s810').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s900').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s990').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#s1080').on("click", function(ev) {{ | |
_setiframesize(ev.target.value.split(" x ")); | |
}}); | |
$('#pbr_btn').on("click", function(ev) {{ | |
let pbr = $('#pbr_value').val(); | |
player.setPlaybackRate(parseFloat(pbr)); | |
}}); | |
$('#step_btn_bw').on("click", function(ev) {{ | |
let sv = $('#step_value_sec').val(); | |
let t = player.getCurrentTime() - parseFloat(sv); | |
player.seekTo(t); | |
}}); | |
$('#step_btn_fw').on("click", function(ev) {{ | |
let sv = $('#step_value_sec').val(); | |
let t = player.getCurrentTime() + parseFloat(sv); | |
player.seekTo(t); | |
}}); | |
}} | |
</script> | |
</body> | |
</html>""" | |
try: | |
from youtubesearchpython import VideosSearch, Playlist, Search | |
def _getplitems_ytsp(dejav, url): | |
if "list=" in url: | |
for item in Playlist.get(url)["videos"]: | |
vid = item["id"] | |
if vid in dejav: | |
continue | |
dejav.add(vid) | |
yield ({ | |
"title": item["title"], | |
"duration": item["duration"], | |
"videoId": vid, | |
"thumbnail": item["thumbnails"][0]["url"] | |
}, False) | |
else: | |
if "search_query=" in url: | |
qs = url.partition("search_query=")[-1] | |
search = VideosSearch(unquote(qs)) | |
else: | |
search = Search(url) | |
while True: | |
res = search.result()['result'] | |
if not res: | |
break | |
for item in (item for item in res if item["type"] == "video"): | |
vid = item["id"] | |
if vid in dejav: | |
continue | |
dejav.add(vid) | |
#print(json.dumps(item, indent=4, ensure_ascii=False)) | |
yield ({ | |
"title": item["title"], | |
"duration": item["duration"], | |
"videoId": vid, | |
"thumbnail": item["thumbnails"][0]["url"] | |
}, False) | |
search.next() | |
except ImportError: | |
_getplitems_ytsp = None | |
def _getplitems(dejav, url): | |
try: | |
if _getplitems_ytsp: | |
yield from _getplitems_ytsp(dejav, url) | |
except Exception as e: | |
#import traceback | |
#print(traceback.format_exc(), file=sys.stderr) | |
print(e, file=sys.stderr) | |
#pass | |
fn, _ = urlretrieve(url) | |
htcont = io.open(fn, encoding="utf-8").read() | |
soup = bs4.BeautifulSoup(htcont, features="html.parser") | |
def _vritem(pvr): | |
#print(json.dumps(pvr, indent=4, ensure_ascii=False), end="\n\n\n") | |
if "title" not in pvr: | |
return | |
tit = pvr["title"] | |
if "runs" in tit: | |
tit = tit["runs"][0]["text"] | |
else: | |
tit = tit["simpleText"] | |
dur = "" | |
if "lengthText" in pvr: | |
dur = pvr["lengthText"]["simpleText"] | |
elif "thumbnailOverlays" in pvr: | |
try: | |
dur = pvr["thumbnailOverlays"][0]["thumbnailOverlayTimeStatusRenderer"] | |
dur = dur["text"]["simpleText"] | |
except Exception: | |
# i think this case is not what we want | |
#print(json.dumps(pvr, indent=4, ensure_ascii=False), end="\n\n\n") | |
#raise | |
return | |
vid = pvr["videoId"] | |
if vid in dejav: | |
return | |
dejav.add(vid) | |
thb = "" | |
if "thumbnail" in pvr: | |
thbs = pvr["thumbnail"] | |
thb = thbs["thumbnails"][0]["url"] | |
return { | |
"title": tit, | |
"duration": dur, | |
"videoId": vid, | |
"thumbnail": thb | |
} | |
def _getvideoitemelem(d, pa): | |
if isinstance(d, (list,)): | |
for c in d: | |
yield from _getvideoitemelem(c, pa) | |
elif isinstance(d, (dict,)): | |
if "videoId" in d: | |
yield d, pa | |
else: | |
for k, v in d.items(): | |
pa = pa or k == "shelfRenderer" | |
yield from _getvideoitemelem(v, pa) | |
ndp = "var ytInitialData = " | |
for scr in soup.find_all("script"): | |
c = list(scr.children) | |
if not c: | |
continue | |
s = c[0] | |
if ndp not in s: | |
continue | |
d = json.loads(s[len(ndp):-1]) | |
for c, pa in _getvideoitemelem(d, False): | |
item = _vritem(c) | |
if item: | |
yield [item, pa] | |
def _load_origdata(ofnhtml, ofnjs): | |
cont = None | |
if os.path.exists(ofnjs): | |
cont = io.open(ofnjs, encoding="utf-8").read() | |
cont = cont[len("var medialist = "):cont.rindex(";")] | |
elif os.path.exists(ofnhtml): | |
temp = io.open(ofnhtml, encoding="utf-8").read() | |
m = re.search('<script type="text/javascript">\nvar medialist =', temp) | |
if m: | |
cont = temp[m.end():] | |
cont = cont[:re.search(";\r?\n</script>", cont).start()] | |
if cont: | |
return json.loads(cont) | |
return [] | |
def _dump( | |
pagetitle, title_pattern, exclude_title_patterns, | |
res, ofnbase, | |
separate_datafile, update_datafile): | |
pagetitle = re.sub(r"\&index=\d+", "", pagetitle) | |
pt = pagetitle.partition("?") | |
if len(pt) > 1: | |
pagetitle = "".join(list(pt[:-1]) + [unquote(pt[-1])]) | |
if ofnbase.endswith(".html"): | |
ofnhtml = ofnbase | |
ofnjs = os.path.splitext(ofnbase)[0] + ".js" | |
else: | |
ofnhtml = ofnbase + ".html" | |
ofnjs = ofnbase + ".js" | |
medialist = [] | |
if update_datafile: | |
medialist.extend(_load_origdata(ofnhtml, ofnjs)) | |
for nitem in [dict(#people_also=pa, | |
videoid=item["videoId"], | |
title=item["title"], | |
duration=item["duration"], | |
thumb=item["thumbnail"]) | |
for item, pa in res | |
if title_pattern.match(item["title"]) and ( | |
not any([excl.search(item["title"]) | |
for excl in exclude_title_patterns]) | |
) and not pa]: | |
if nitem in medialist: | |
continue | |
medialist.append(nitem) | |
medialist_script_cont = _MEDIALISTSCRIPTTMPL.format( | |
medialist=json.dumps( | |
medialist, | |
ensure_ascii=False, indent=2)) | |
# | |
if not separate_datafile: | |
medialist_script = """<script type="text/javascript"> | |
{medialist_script} | |
</script> | |
""".format(medialist_script=medialist_script_cont) | |
else: | |
medialist_script = '<script type="text/javascript" src="{}"></script>'.format( | |
ofnjs) | |
with io.open(ofnjs, "w", encoding="utf-8", newline="\n") as fo: | |
print(medialist_script_cont, file=fo) | |
# | |
with io.open(ofnhtml, "w", encoding="utf-8", newline="\n") as fo: | |
print(_HTMLTMPL.format( | |
pagetitle=pagetitle, | |
medialist_script=medialist_script), file=fo) | |
def _urlmap(s): | |
from urllib.parse import urlsplit | |
comps = urlsplit(s) | |
if not comps.scheme: | |
return "https://www.youtube.com/playlist?list=" + s | |
return s | |
def _allplaylists(url): | |
base = "https://www.youtube.com/playlist?list=" | |
fn, _ = urlretrieve(url) | |
htcont = io.open(fn, encoding="utf-8").read() | |
soup = bs4.BeautifulSoup(htcont, features="html.parser") | |
def _getplistitemelem(d): | |
if isinstance(d, (list,)): | |
for c in d: | |
yield from _getplistitemelem(c) | |
elif isinstance(d, (dict,)): | |
if "playlistId" in d: | |
yield d | |
else: | |
for k, v in d.items(): | |
yield from _getplistitemelem(v) | |
ndp = "var ytInitialData = " | |
for scr in soup.find_all("script"): | |
c = list(scr.children) | |
if not c: | |
continue | |
s = c[0] | |
if ndp not in s: | |
continue | |
d = json.loads(s[len(ndp):-1]) | |
yield from [ | |
base + r.get("playlistId") | |
for r in _getplistitemelem(d)] | |
if __name__ == '__main__': | |
import argparse | |
ap = argparse.ArgumentParser() | |
ap.add_argument( | |
"--argtype", | |
choices=["", "search_words"], | |
default="") | |
ap.add_argument("arg", nargs="+") | |
ap.add_argument( | |
"--outfilebase", | |
default=os.path.splitext(os.path.basename(sys.argv[0]))[0]) | |
ap.add_argument( | |
"--separate_datafile", | |
action="store_true") | |
ap.add_argument( | |
"--update_datafile", | |
action="store_true") | |
ap_egrp = ap.add_mutually_exclusive_group() | |
gr1 = ap_egrp.add_argument_group("pattern_from_cmdline") | |
gr2 = ap_egrp.add_argument_group("pattern_from_file") | |
gr1.add_argument( | |
"--title_pattern") | |
gr1.add_argument( | |
"--exclude_title_pattern", | |
action="append") | |
# --- | |
# {"match": ".*", "excludes": ["aaa", "bbb"]} | |
# --- | |
gr2.add_argument("--pattern_config") | |
args = ap.parse_args() | |
tit = [] | |
result = [] | |
title_pattern = ".*" | |
exclude_title_patterns = [] | |
if args.pattern_config: | |
cfg = json.load(io.open(args.pattern_config, encoding="utf-8")) | |
title_pattern = cfg.get("match", ".*") | |
if "excludes" in cfg: | |
for p in cfg["excludes"]: | |
exclude_title_patterns.append(re.compile(p, flags=re.I)) | |
else: | |
if args.title_pattern: | |
title_pattern = args.title_pattern | |
if args.exclude_title_pattern: | |
for p in args.exclude_title_pattern: | |
exclude_title_patterns.append(re.compile(p, flags=re.I)) | |
title_pattern = re.compile(title_pattern, flags=re.I) | |
dejav = set() | |
if args.argtype == "search_words": | |
b = "https://www.youtube.com/results?search_query=" | |
rawurls = [b + "+".join([quote(c.encode("utf-8")) for c in " ".join(args.arg).split()])] | |
else: | |
rawurls = [_urlmap(a) for a in args.arg] | |
urls = [] | |
for url in rawurls: | |
if url.endswith("/playlists"): | |
urls.extend(list(_allplaylists(url))) | |
else: | |
urls.append(url) | |
for url in urls: | |
tit.append(url.rpartition("/")[-1]) | |
result.extend(list(filter(None, _getplitems(dejav, url)))) | |
_dump( | |
", ".join(tit), | |
title_pattern, | |
exclude_title_patterns, | |
result, | |
args.outfilebase, | |
args.separate_datafile, | |
args.update_datafile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment