Last active
September 19, 2021 03:22
-
-
Save CTimmerman/3ede451ca2b30ce551a6a977e2a7c0ae to your computer and use it in GitHub Desktop.
YouTube Stream Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""YouTube Stream Downloader | |
2021-08-16 v1.0 by Cees Timmerman | |
2021-09-18 v1.1 Better error handling. Revert to XML captions as conversion to SRT is broken in pytube 11.0.1 for https://www.youtube.com/watch?v=AOZw1tgD8dA | |
""" | |
import logging, os, re | |
import pytube | |
from pytube.cli import on_progress | |
# Least gnarly solution for printing the source of any error or exception; traceback module only handled the latter. | |
logging.basicConfig(format='%(levelname)s:%(message)s', level=os.environ.get("YT_DL_LOGLEVEL", "INFO")) | |
log = logging.getLogger() | |
def fix_filename(name, max_length=255): | |
""" | |
Replace invalid characters on Linux/Windows/MacOS with underscores. | |
List from https://stackoverflow.com/a/31976060/819417 | |
Trailing spaces & periods are ignored on Windows. | |
>>> fix_filename(" COM1 ") | |
'_ COM1 _' | |
>>> fix_filename("COM10") | |
'COM10' | |
>>> fix_filename("COM1,") | |
'COM1,' | |
>>> fix_filename("COM1.txt") | |
'_.txt' | |
>>> all('_' == fix_filename(chr(i)) for i in list(range(32))) | |
True | |
""" | |
return re.sub(r'[/\\:|<>"?*\0-\x1f]|^(AUX|COM[1-9]|CON|LPT[1-9]|NUL|PRN)(?![^.])|^\s|[\s.]$', "_", name[:max_length], flags=re.IGNORECASE) | |
# Includes . return re.sub(r'[/\\:|<>"?*\0-\31]|^(AUX|COM[1-9]|CON|LPT[1-9]|NUL|PRN)(\.|$)|^\s|[\s.]$', "_", name[:max_length], flags=re.IGNORECASE) | |
def download_youtube_stream(url, folder='output-folder'): | |
youtube = pytube.YouTube(url, on_progress_callback=on_progress) | |
print(youtube.title) | |
""" | |
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_age_restricted', '_author', '_embed_html', '_fmt_streams', '_initial_data', '_js', '_js_url', '_metadata', '_player_config_args', '_publish_date', '_title', '_vid_info', '_watch_html', 'age_restricted', 'allow_oauth_cache', 'author', 'bypass_age_gate', 'caption_tracks', 'captions', 'channel_id', 'channel_url', 'check_availability', 'description', 'embed_html', 'embed_url', 'fmt_streams', 'initial_data', 'js', 'js_url', 'keywords', 'length', 'metadata', 'publish_date', 'rating', 'register_on_complete_callback', 'register_on_progress_callback', 'stream_monostate', 'streaming_data', 'streams', 'thumbnail_url', 'title', 'use_oauth', 'vid_info', 'video_id', 'views', 'watch_html', 'watch_url'] | |
""" | |
streams = youtube.streams | |
""" | |
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_filesize', '_monostate', 'abr', 'audio_codec', 'bitrate', 'codecs', 'default_filename', 'download', 'exists_at_path', 'expiration', 'filesize', 'filesize_approx', 'get_file_path', 'includes_audio_track', 'includes_video_track', 'is_3d', 'is_adaptive', 'is_dash', 'is_hdr', 'is_live', 'is_otf', 'is_progressive', 'itag', 'mime_type', 'on_complete', 'on_progress', 'parse_codecs', 'resolution', 'stream_to_buffer', 'subtype', 'title', 'type', 'url', 'video_codec'] | |
""" | |
for i, s in enumerate(streams): | |
if s.includes_audio_track and s.includes_video_track: | |
print(f"{i + 1:>2}: {s.resolution:>5} {s.bitrate / 1000:>4,.0f}kbps {s.abr:>7} {s.mime_type:<10} {str(s.codecs):<28} {s.filesize / 1000_000:>7,.2f} MB {s.filesize / 1024 / 1024:>7,.2f} MiB") | |
choice = int(input("Which stream? ")) - 1 | |
stream = streams[choice] | |
print("Downloading...") | |
stream.download(folder, skip_existing=True) | |
if 'en' in youtube.captions: | |
caption = youtube.captions['en'] | |
print(caption) | |
# xml->srt broken in pytube 11.0.1 on https://www.youtube.com/watch?v=AOZw1tgD8dA | |
caption.download(title=fix_filename(youtube.title), output_path=folder, srt=False) | |
print(f"\nDone. {folder}\\{s.default_filename}") | |
if __name__ == "__main__": | |
try: | |
# Python 2 is dangerous (use raw_input) but unsupported. | |
download_youtube_stream(input("YouTube URL: ")) | |
except Exception as ex: | |
log.exception(ex) | |
input() # Keep console open until user presses enter. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment