Last active
March 25, 2025 22:00
-
-
Save boydaihungst/c7c617eb0d7f09aba71a10c47be4282d to your computer and use it in GitHub Desktop.
yt download script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Youtube Subtitle Downloader v36 | |
// @description Download Subtitles | |
// @include https://*youtube.com/* | |
// @author Cheng Zheng | |
// @copyright 2009 Tim Smart; 2011 gw111zz; 2014~2023 Cheng Zheng; | |
// @license GNU GPL v3.0 or later. http://www.gnu.org/copyleft/gpl.html | |
// @require https://code.jquery.com/jquery-1.12.4.min.js | |
// @version 36 | |
// @grant GM_xmlhttpRequest | |
// @grant unsafeWindow | |
// @namespace https://greasyfork.org/users/5711 | |
// @downloadURL https://update.greasyfork.org/scripts/5368/Youtube%20Subtitle%20Downloader%20v36.user.js | |
// @updateURL https://update.greasyfork.org/scripts/5368/Youtube%20Subtitle%20Downloader%20v36.meta.js | |
// ==/UserScript== | |
/* | |
[What is this?] | |
This Tampermonkey script allows you to download Youtube "Automatic subtitle" and "closed subtitle". | |
[Note] | |
If it doesn't work (rarely), try to refresh the page. | |
If problem still exists after refreshing, send an email to [email protected]. | |
[Who built this?] | |
Author : Cheng Zheng (郑诚) | |
Email : [email protected] | |
Github : https://github.com/1c7/Youtube-Auto-Subtitle-Download | |
[Note for Developers] | |
1. Some comments are written in Chinese. | |
2. This code handles both "Auto" and "Closed" subtitles. | |
[Test Video] | |
https://www.youtube.com/watch?v=bkVsus8Ehxs | |
This videos only has a closed English subtitle, with no auto subtitles. | |
https://www.youtube.com/watch?v=-WEqFzyrbbs | |
no subtitle at all | |
https://www.youtube.com/watch?v=9AzNEG1GB-k | |
have a lot of subtitles | |
https://www.youtube.com/watch?v=tqGkOvrKGfY | |
1:36:33 super long subtitle | |
[How does it work?] | |
The code can be roughly divided into three parts: | |
1. Add a button on the page. (UI) | |
2. Detect if subtitle exists. | |
3. Convert subtitle format, then download. | |
[Test Enviroment] | |
Works best on Chrome + Tampermonkey. | |
There are plenty Chromium-based Browser, I do not guarantee this work on all of them; | |
备注: | |
有时候不能用,是因为 jQuery 的 CDN 无法载入,解决办法是修改这一行 | |
// @require https://code.jquery.com/jquery-1.12.4.min.js | |
改成一个别的 jQuery 地址,比如 | |
https://cdn.bootcdn.net/ajax/libs/jquery/1.12.4/jquery.js | |
https://cdn.staticfile.org/jquery/1.12.4/jquery.min.js | |
更新日志 | |
## 2022年12月23号:升级到 v35 | |
常规升级。把下载框挪到标题下面,之前太靠下了(放到了描述的下面)现在挪上去一点。 | |
*/ | |
;(function () { | |
// Config | |
var NO_SUBTITLE = 'No Subtitle' | |
var HAVE_SUBTITLE = 'Download Subtitles' | |
var TEXT_LOADING = 'Loading...' | |
const BUTTON_ID = | |
'youtube-subtitle-downloader-by-1c7-latest-update-2022-decemeber-23' | |
// Config | |
var HASH_BUTTON_ID = `#${BUTTON_ID}` | |
// initialize | |
var first_load = true // indicate if first load this webpage or not | |
var youtube_playerResponse_1c7 = null // for auto subtitle | |
unsafeWindow.caption_array = [] // store all subtitle | |
$(document).ready(function () { | |
make_sure_it_load_properly_before_continue() | |
}) | |
async function wait_until_element_exists(element_identifier) { | |
var retry_count = 0 | |
var RETRY_LIMIT = 50 | |
return new Promise(function (resolve, reject) { | |
var intervalID = setInterval(function () { | |
try { | |
var element = document.querySelector(element_identifier) | |
if (element != null) { | |
resolve(true) | |
} else { | |
retry_count = retry_count + 1 | |
// console.log(`重试次数 ${retry_count}`); | |
if (retry_count > RETRY_LIMIT) { | |
clearInterval(intervalID) | |
reject(false) | |
} | |
} | |
} catch (error) { | |
reject(false) | |
} | |
}, 330) | |
}) | |
} | |
async function make_sure_it_load_properly_before_continue() { | |
var id = new_Youtube_2022_UI_element_identifier() | |
var result = await wait_until_element_exists(id) | |
if (result) { | |
init_UI() | |
} | |
} | |
// trigger when loading new page | |
// (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false) | |
// (new Material design version would trigger this "yt-navigate-finish" event. old version would not.) | |
var body = document.getElementsByTagName('body')[0] | |
body.addEventListener('yt-navigate-finish', function (event) { | |
// 2021-8-9 测试结果:yt-navigate-finish 可以正常触发 | |
if (current_page_is_video_page() === false) { | |
return | |
} | |
youtube_playerResponse_1c7 = event.detail.response.playerResponse // for auto subtitle | |
unsafeWindow.caption_array = [] // clean up (important, otherwise would have more and more item and cause error) | |
// if use click to another page, init again to get correct subtitle | |
if (first_load === false) { | |
remove_subtitle_download_button() | |
init_UI() | |
} | |
}) | |
// 我们用这个元素判断是不是 2022 年新 UI 。 | |
// return Element; | |
function new_Youtube_2022_UI_element() { | |
return document.querySelector(new_Youtube_2022_UI_element_identifier()) | |
} | |
function new_Youtube_2022_UI_element_identifier() { | |
var document_querySelector = '#owner.item.style-scope.ytd-watch-metadata' | |
return document_querySelector | |
} | |
// return true / false | |
// Detect [new version UI(material design)] OR [old version UI] | |
// I tested this, accurated. | |
function new_material_design_version() { | |
var old_title_element = document.getElementById('watch7-headline') | |
if (old_title_element) { | |
return false | |
} else { | |
return true | |
} | |
} | |
// return true / false | |
function current_page_is_video_page() { | |
return get_url_video_id() !== null | |
} | |
// return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ" | |
// or null | |
function get_url_video_id() { | |
return getURLParameter('v') | |
} | |
//https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513 | |
function getURLParameter(name) { | |
return ( | |
decodeURIComponent( | |
(new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec( | |
location.search | |
) || [null, ''])[1].replace(/\+/g, '%20') | |
) || null | |
) | |
} | |
function remove_subtitle_download_button() { | |
$(HASH_BUTTON_ID).remove() | |
} | |
// 初始化 | |
function init_UI() { | |
var html_element = get_main_UI_element() | |
// 旧版 UI | |
var old_anchor_element = document.getElementById('watch7-headline') | |
if (old_anchor_element != null) { | |
old_anchor_element.appendChild(html_element) | |
} | |
// 新版 UI | |
var anchor = document.querySelector('#above-the-fold #title') | |
if (anchor) { | |
anchor.appendChild(html_element) | |
} | |
first_load = false | |
} | |
function get_main_UI_element() { | |
var div = document.createElement('div'), | |
select = document.createElement('select'), | |
option = document.createElement('option'), | |
downloadBtn = document.createElement('button') | |
var css_div = `display: table; | |
margin-top:4px; | |
border: 1px solid rgb(0, 183, 90); | |
cursor: pointer; color: rgb(255, 255, 255); | |
border-top-left-radius: 3px; | |
border-top-right-radius: 3px; | |
border-bottom-right-radius: 3px; | |
border-bottom-left-radius: 3px; | |
background-color: #00B75A; | |
` | |
div.setAttribute('style', css_div) | |
div.id = BUTTON_ID | |
select.id = 'captions_selector' | |
select.disabled = true | |
let css_select = `display:block; | |
border: 1px solid rgb(0, 183, 90); | |
cursor: pointer; | |
color: rgb(255, 255, 255); | |
background-color: #00B75A; | |
padding: 4px; | |
` | |
select.setAttribute('style', css_select) | |
option.textContent = TEXT_LOADING | |
option.selected = true | |
select.appendChild(option) | |
// 下拉菜单里,选择一项后触发下载 | |
// select.addEventListener( | |
// 'change', | |
// function () { | |
// download_subtitle(this) | |
// }, | |
// false | |
// ) | |
downloadBtn.id = 'captions_download_btn' | |
downloadBtn.setAttribute('style', `display:inline-block; | |
border: 1px solid rgb(0, 183, 90); | |
cursor: pointer; | |
color: rgb(255, 255, 255); | |
background-color: #00B75A; | |
padding: 4px; | |
`) | |
downloadBtn.innerText="Download sub" | |
downloadBtn.addEventListener( | |
'click', | |
function () { | |
download_subtitle(select) | |
}, | |
false | |
) | |
div.appendChild(select) // put <select> into <div> | |
div.appendChild(downloadBtn) | |
// put the div into page: new material design | |
var title_element = document.querySelectorAll( | |
'.title.style-scope.ytd-video-primary-info-renderer' | |
) | |
if (title_element) { | |
$(title_element[0]).after(div) | |
} | |
load_language_list(select) | |
// <a> element is for download | |
var a = document.createElement('a') | |
a.style.cssText = 'display:none;' | |
a.setAttribute('id', 'ForSubtitleDownload') | |
var body = document.getElementsByTagName('body')[0] | |
body.appendChild(a) | |
return div | |
} | |
// trigger when user select <option> | |
async function download_subtitle(selector) { | |
// if user select first <option>, we just return, do nothing. | |
if (selector.selectedIndex == 0) { | |
return | |
} | |
var caption = caption_array[selector.selectedIndex - 1] | |
// because first <option> is for display, so index - 1 | |
var result = null | |
var filename = null // 保存文件名 | |
// if user choose auto subtitle | |
if (caption.lang_code == 'AUTO') { | |
result = await get_auto_subtitle() | |
filename = get_file_name(get_auto_subtitle_name()) | |
} else { | |
// closed subtitle | |
let lang_code = caption.lang_code | |
let lang_name = caption.lang_name | |
result = await get_closed_subtitle(lang_code) | |
filename = get_file_name(lang_name) | |
} | |
let srt = parse_youtube_XML_to_SRT(result) | |
downloadString(srt, 'text/plain', filename) | |
// After download, select first <option> | |
// selector.options[0].selected = true | |
} | |
// Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt" | |
function get_file_name(x) { | |
// var method_1 = '(' + x + ')' + document.title + '.srt'; // 如果有通知数,文件名也会带上,比较烦,这种方式不好 | |
// var method_2 = '(' + x + ')' + get_title() + '.srt'; | |
var method_3 = `(${x})${get_title().replace('#', '')}_video_id_${get_video_id()}.srt` | |
return method_3 | |
} | |
// 拿完整字幕的 XML | |
// async function get_closed_subtitles() { | |
// var list_url = 'https://video.google.com/timedtext?hl=en&v=' + get_url_video_id() + '&type=list'; | |
// // Example: https://video.google.com/timedtext?hl=en&v=if36bqHypqk&type=list | |
// return new Promise(function (resolve, reject) { | |
// GM_xmlhttpRequest({ | |
// method: 'GET', | |
// url: list_url, | |
// onload: function (xhr) { | |
// resolve(xhr.responseText) | |
// } | |
// }) | |
// }) | |
// } | |
// detect if "auto subtitle" and "closed subtitle" exist | |
// and add <option> into <select> | |
async function load_language_list(select) { | |
// auto | |
var auto_subtitle_exist = false | |
// closed | |
var closed_subtitle_exist = false | |
// get auto subtitle | |
var auto_subtitle_url = get_auto_subtitle_xml_url() | |
if (auto_subtitle_url != false) { | |
auto_subtitle_exist = true | |
} | |
var captionTracks = get_captionTracks() | |
if ( | |
captionTracks != undefined && | |
typeof captionTracks === 'object' && | |
captionTracks.length > 0 | |
) { | |
closed_subtitle_exist = true | |
} | |
// if no subtitle at all, just say no and stop | |
if (auto_subtitle_exist == false && closed_subtitle_exist == false) { | |
select.options[0].textContent = NO_SUBTITLE | |
disable_download_button() | |
return false | |
} | |
// if at least one type of subtitle exist | |
select.options[0].textContent = HAVE_SUBTITLE | |
select.disabled = false | |
var option = null // for <option> | |
var caption_info = null // for our custom object | |
// if auto subtitle exist | |
if (auto_subtitle_exist) { | |
caption_info = { | |
lang_code: 'AUTO', // later we use this to know if it's auto subtitle | |
lang_name: get_auto_subtitle_name(), // for display only | |
} | |
caption_array.push(caption_info) | |
option = document.createElement('option') | |
option.textContent = caption_info.lang_name | |
select.appendChild(option) | |
} | |
// if closed_subtitle_exist | |
if (closed_subtitle_exist) { | |
for (var i = 0, il = captionTracks.length; i < il; i++) { | |
var caption = captionTracks[i] | |
if (caption.kind == 'asr') { | |
continue | |
} | |
let lang_code = caption.languageCode | |
let lang_translated = caption.name.simpleText | |
let lang_name = lang_code_to_local_name(lang_code, lang_translated) | |
caption_info = { | |
lang_code: lang_code, | |
lang_name: lang_name, | |
} | |
caption_array.push(caption_info) | |
// 加到 caption_array 里, 一个全局变量, 待会要靠它来下载 | |
option = document.createElement('option') | |
option.textContent = caption_info.lang_name | |
if (lang_name.toLowerCase() == "vietnamese") { | |
option.selected = true; | |
} | |
select.appendChild(option) | |
} | |
} | |
} | |
function disable_download_button() { | |
$(HASH_BUTTON_ID) | |
.css('border', '#95a5a6') | |
.css('cursor', 'not-allowed') | |
.css('background-color', '#95a5a6') | |
$('#captions_selector') | |
.css('border', '#95a5a6') | |
.css('cursor', 'not-allowed') | |
.css('background-color', '#95a5a6') | |
if (new_material_design_version()) { | |
$(HASH_BUTTON_ID).css('padding', '6px') | |
} else { | |
$(HASH_BUTTON_ID).css('padding', '5px') | |
} | |
} | |
// 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029" | |
// 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350 | |
function process_time(s) { | |
s = s.toFixed(3) | |
// 超棒的函数, 不论是整数还是小数都给弄成3位小数形式 | |
// 举个柚子: | |
// 671.33 -> 671.330 | |
// 671 -> 671.000 | |
// 注意函数会四舍五入. 具体读文档 | |
var array = s.split('.') | |
// 把开始时间根据句号分割 | |
// 671.330 会分割成数组: [671, 330] | |
var Hour = 0 | |
var Minute = 0 | |
var Second = array[0] // 671 | |
var MilliSecond = array[1] // 330 | |
// 先声明下变量, 待会把这几个拼好就行了 | |
// 我们来处理秒数. 把"分钟"和"小时"除出来 | |
if (Second >= 60) { | |
Minute = Math.floor(Second / 60) | |
Second = Second - Minute * 60 | |
// 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒 | |
Hour = Math.floor(Minute / 60) | |
Minute = Minute - Hour * 60 | |
// 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟 | |
} | |
// 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。 | |
if (Minute < 10) { | |
Minute = '0' + Minute | |
} | |
// 小时 | |
if (Hour < 10) { | |
Hour = '0' + Hour | |
} | |
// 秒 | |
if (Second < 10) { | |
Second = '0' + Second | |
} | |
return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond | |
} | |
// copy from: https://gist.github.com/danallison/3ec9d5314788b337b682 | |
// Thanks! https://github.com/danallison | |
// work in Chrome 66 | |
// test passed: 2018-5-19 | |
function downloadString(text, fileType, fileName) { | |
var blob = new Blob([text], { | |
type: fileType, | |
}) | |
var a = document.createElement('a') | |
a.download = fileName | |
a.href = URL.createObjectURL(blob) | |
a.dataset.downloadurl = [fileType, a.download, a.href].join(':') | |
a.style.display = 'none' | |
document.body.appendChild(a) | |
a.click() | |
document.body.removeChild(a) | |
setTimeout(function () { | |
URL.revokeObjectURL(a.href) | |
}, 1500) | |
} | |
// https://css-tricks.com/snippets/javascript/unescape-html-in-js/ | |
// turn HTML entity back to text, example: " should be " | |
function htmlDecode(input) { | |
var e = document.createElement('div') | |
e.class = | |
'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity' | |
e.innerHTML = input | |
return e.childNodes.length === 0 ? '' : e.childNodes[0].nodeValue | |
} | |
// return URL or null; | |
// later we can send a AJAX and get XML subtitle | |
function get_auto_subtitle_xml_url() { | |
try { | |
var captionTracks = get_captionTracks() | |
for (var index in captionTracks) { | |
var caption = captionTracks[index] | |
if (caption.kind === 'asr') { | |
return captionTracks[index].baseUrl | |
} | |
// ASR – A caption track generated using automatic speech recognition. | |
// https://developers.google.com/youtube/v3/docs/captions | |
} | |
return false | |
} catch (error) { | |
return false | |
} | |
} | |
async function get_auto_subtitle() { | |
var url = get_auto_subtitle_xml_url() | |
if (url == false) { | |
return false | |
} | |
var result = await get(url) | |
return result | |
} | |
async function get_closed_subtitle(lang_code) { | |
try { | |
var captionTracks = get_captionTracks() | |
for (var i in captionTracks) { | |
var caption = captionTracks[i] | |
if (caption.languageCode === lang_code && caption.kind != 'asr') { | |
// 必须写 caption.kind != 'asr' | |
// 否则会下载2个字幕文件(也就是这个分支会进来2次) | |
// 因为 lang_code 是 "en" 会 match 2条纪录,一条是自动字幕,一条是完整字幕 | |
// "自动字幕"那条是 kind=asr | |
// "完整字幕"那条没有 kind 属性 | |
let url = captionTracks[i].baseUrl | |
let result = await get(url) | |
return result | |
} | |
} | |
return false | |
} catch (error) { | |
return false | |
} | |
} | |
// Youtube return XML. we want SRT | |
// input: Youtube XML format | |
// output: SRT format | |
function parse_youtube_XML_to_SRT(youtube_xml_string) { | |
if (youtube_xml_string === '') { | |
return false | |
} | |
var text = youtube_xml_string.getElementsByTagName('text') | |
var result = '' | |
var BOM = '\uFEFF' | |
result = BOM + result // store final SRT result | |
var len = text.length | |
for (var i = 0; i < len; i++) { | |
var index = i + 1 | |
var content = text[i].textContent.toString() | |
content = content.replace(/(<([^>]+)>)/gi, '') // remove all html tag. | |
var start = text[i].getAttribute('start') | |
var end = | |
parseFloat(text[i].getAttribute('start')) + | |
parseFloat(text[i].getAttribute('dur')) | |
// 保留这段代码 | |
// 如果希望字幕的结束时间和下一行的开始时间相同(连在一起) | |
// 可以取消下面的注释 | |
// if (i + 1 >= len) { | |
// end = parseFloat(text[i].getAttribute('start')) + parseFloat(text[i].getAttribute('dur')); | |
// } else { | |
// end = text[i + 1].getAttribute('start'); | |
// } | |
// we want SRT format: | |
/* | |
1 | |
00:00:01,939 --> 00:00:04,350 | |
everybody Craig Adams here I'm a | |
2 | |
00:00:04,350 --> 00:00:06,720 | |
filmmaker on YouTube who's digging | |
*/ | |
var new_line = '\n' | |
result = result + index + new_line | |
// 1 | |
var start_time = process_time(parseFloat(start)) | |
var end_time = process_time(parseFloat(end)) | |
result = result + start_time | |
result = result + ' --> ' | |
result = result + end_time + new_line | |
// 00:00:01,939 --> 00:00:04,350 | |
content = htmlDecode(content) | |
// turn HTML entity back to text. example: ' back to apostrophe (') | |
result = result + content + new_line + new_line | |
// everybody Craig Adams here I'm a | |
} | |
return result | |
} | |
// return "English (auto-generated)" or a default name; | |
function get_auto_subtitle_name() { | |
try { | |
var captionTracks = get_captionTracks() | |
for (var index in captionTracks) { | |
var caption = captionTracks[index] | |
if (typeof caption.kind === 'string' && caption.kind == 'asr') { | |
return captionTracks[index].name.simpleText | |
} | |
} | |
return 'Auto Subtitle' | |
} catch (error) { | |
return 'Auto Subtitle' | |
} | |
} | |
function get_youtube_data() { | |
return document.getElementsByTagName('ytd-app')[0].data.playerResponse | |
} | |
function get_captionTracks() { | |
let data = get_youtube_data() | |
var captionTracks = | |
data?.captions?.playerCaptionsTracklistRenderer?.captionTracks | |
return captionTracks | |
} | |
// Input a language code, output that language name in current locale | |
// 如果当前语言是中文简体, Input: "de" Output: 德语 | |
// if current locale is English(US), Input: "de" Output: "Germany" | |
function lang_code_to_local_name(languageCode, fallback_name) { | |
try { | |
var captionTracks = get_captionTracks() | |
for (var i in captionTracks) { | |
var caption = captionTracks[i] | |
if (caption.languageCode === languageCode) { | |
let simpleText = captionTracks[i].name.simpleText | |
if (simpleText) { | |
return simpleText | |
} else { | |
return fallback_name | |
} | |
} | |
} | |
} catch (error) { | |
return fallback_name | |
} | |
} | |
// 获取视频标题 | |
function get_title() { | |
// 方法1:先尝试拿到标题 | |
var title_element = document.querySelector( | |
'h1.title.style-scope.ytd-video-primary-info-renderer' | |
) | |
if (title_element != null) { | |
var title = title_element.innerText | |
// 能拿到就返回 | |
if (title != undefined && title != null && title != '') { | |
return title | |
} | |
} | |
// 方法2:如果方法1失效用这个 | |
return ytplayer.bootstrapPlayerResponse.videoDetails.videoId // 这个会 delay, 如果页面跳转了,这个获得的标题还是旧的 | |
} | |
function get_video_id() { | |
return ytplayer.bootstrapPlayerResponse.videoDetails.videoId | |
} | |
// Usage: var result = await get(url) | |
function get(url) { | |
return $.ajax({ | |
url: url, | |
type: 'get', | |
success: function (r) { | |
return r | |
}, | |
fail: function (error) { | |
return error | |
}, | |
}) | |
} | |
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)) | |
// 等待一个元素存在 | |
// https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-exists | |
function waitForElm(selector) { | |
return new Promise((resolve) => { | |
if (document.querySelector(selector)) { | |
return resolve(document.querySelector(selector)) | |
} | |
const observer = new MutationObserver((mutations) => { | |
if (document.querySelector(selector)) { | |
resolve(document.querySelector(selector)) | |
observer.disconnect() | |
} | |
}) | |
observer.observe(document.body, { | |
childList: true, | |
subtree: true, | |
}) | |
}) | |
} | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment