Last active
January 7, 2024 21:51
-
-
Save tomeko/df12139071acf2d163d96ca2d2272027 to your computer and use it in GitHub Desktop.
Download youtube movie trailers via a single file or batch download (for a directory of movies). Single file mode use case: run a trigger from jellyfin when content added
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# trailer_dl.py | |
# | |
# Required packages: yt_dlp, youtubesearchpython | |
# Python: 3.8.10 | |
# | |
# arguments: | |
# python trailer_dl.py <movie_file_or_directory> | |
# If a directory is given, it will find all mp4's one path deep. For example /my/media/movies (which contains /my/media/movie1/movie1 (2021).mp4, /my/media/movie2/movie2 (2020).mp4, etc.) | |
# If a file is given (.mp4), it will only get the trailer for that movie | |
# Either way, it makes an extras folder (works for jellyfin) in that particular movies directory and downloads the trailer there | |
# python trailer_dl.py dryrun <movie_file_or_directory> | |
# Runs a dryrun without any changes/downloads, records results to dryrun.txt | |
# python trailer_dl.py $(pwd) | |
# Runs in current directory | |
# | |
# other: | |
# If the extras folder already exists with an mp4, it will skip downloading the trailer. | |
# Search results work best with movie file format <title> (year)*.mp4. Example: "Name of the movie (2022) AAC.whateverelse.mp4" | |
# This was whipped together pretty quickly, no exception handling, etc. Use at your own risk. | |
# todos: force redownload if exists, trailer file extension/format choice | |
import sys, os, json, shutil, re, glob, yt_dlp, youtubesearchpython | |
max_trailer_sz_mb = 200 | |
# format selector for yt_dlp, best mp4 video | |
def format_selector(ctx): | |
# formats are already sorted worst to best | |
formats = ctx.get('formats')[::-1] | |
# acodec='none' means there is no audio | |
best_video = next(f for f in formats | |
if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4') | |
# find compatible audio extension | |
audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] | |
# vcodec='none' means there is no video | |
best_audio = next(f for f in formats if ( | |
f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) | |
# These are the minimum required fields for a merged format | |
yield { | |
'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', | |
'ext': best_video['ext'], | |
'requested_formats': [best_video, best_audio], | |
# Must be + separated list of protocols | |
'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' | |
} | |
global dryrun_log | |
movie_files = [] | |
dryrun = False | |
inparg = '' | |
work_path = '' | |
max_trailer_sz = max_trailer_sz_mb * 1024 * 1024 | |
# check arguments, inparg will be file or directory | |
if len(sys.argv) == 3 and sys.argv[1] == 'dryrun': | |
dryrun = True | |
inparg = sys.argv[2] | |
print(f"dryrun started for {inparg}") | |
elif len(sys.argv) == 1: | |
print("input directory or file to scan") | |
sys.exit() | |
else: | |
inparg = sys.argv[1] | |
# check if inparg is file or directory, handle | |
if os.path.isfile(inparg): | |
if os.path.exists(inparg): | |
movie_files.append(inparg) | |
else: | |
print(f"Error: file {inparg} doesn't exist") | |
sys.exit() | |
elif os.path.isdir(inparg): | |
work_path = inparg | |
else: | |
print(f"Error: couldn't parse {inparg}") | |
if dryrun: | |
dryrun_log = open(os.path.join(work_path,'dryrun.txt'), 'w') | |
# get all movies from if directory passed as arg | |
if work_path: | |
print(f"Searching movies in path: {work_path}") | |
movie_files = glob.glob(os.path.join(work_path, '**/*.mp4') , recursive=False) | |
for movie_file in movie_files: | |
finf = os.path.split(os.path.abspath(movie_file)) | |
fpath = finf[0] # file directory | |
fname = os.path.splitext(finf[1])[0] # filename without extension | |
title = '' | |
year = '' | |
extras_dir = os.path.join(fpath, "extras") | |
# check the extras dir | |
if os.path.isdir(extras_dir): | |
continue | |
#if any(tf.endswith('.mp4') for tf in os.listdir(extras_dir)): | |
# print(f"Already has extras, skipping: {fname}") | |
# continue | |
# see if we can extract the title and year from filename, otherwise just use the filename without extension (though this makes for worse search results/trailer matches) | |
match = re.match(r'(.*).*\((19|20\d{2})\).*', fname) | |
if match is not None: | |
title = match.group(1) | |
year = match.group(2) | |
else: | |
# try another regex | |
matches = re.search(r'^(?P<Name>.+?)(?!\.[12]\d\d\d\.\d{,3}[ip]\.)\.(?P<Year>\d\d\d\d)\.(?P<Resolution>[^.]+)\.(?P<Format>[^.]+)', fname) | |
if matches: | |
gd = matches.groupdict() | |
if 'Name' in gd: | |
title = matches.group('Name').replace('.', ' ') | |
if 'Year' in gd: | |
year = matches.group('Year') | |
if 'Name' not in gd: | |
title = fname | |
else: | |
title = fname | |
if title and year: | |
print(f"title: {title}, year: {year}") | |
# find link using youtubesearchpython | |
link = '' | |
query_txt = title | |
if year: | |
query_txt += f' {year}' | |
query = f'{query_txt} trailer' | |
videosSearch = youtubesearchpython.VideosSearch(query, limit = 1) | |
resroot = videosSearch.result() | |
if resroot and resroot['result']: | |
res = resroot['result'][0] | |
link = res['link'] | |
if not link: | |
msg = f"Couldn't find a trailer for {movie_file}" | |
print(msg) | |
if dryrun: | |
dryrun_log.write(msg) | |
continue | |
if dryrun: | |
dryrun_log.write(f'file: {fname} query: {query} link: {link}\n') | |
final_name = f'{title} ({year})' | |
if title is None or year is None: | |
final_name = fname | |
# file out name | |
fout = os.path.join(extras_dir, final_name + '-trailer.mp4') | |
# yt_dlp options | |
# ydl_opts = { | |
# 'format': format_selector, | |
# 'outtmpl': fout, | |
# 'max_filesize': max_trailer_sz | |
# } | |
ydl_opts = { | |
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]', | |
'default_search': 'auto', | |
'restrictfilenames': True, | |
'prefer_ffmpeg': True, | |
'quiet': True, | |
'no_warnings': True, | |
'ignoreerrors': True, | |
'noplaylist': True, | |
'noprogress': True, | |
'outtmpl': fout, | |
'max_filesize': max_trailer_sz | |
} | |
if not dryrun: | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
print(f"Downloading trailer for {movie_file}") | |
try: | |
if not dryrun: | |
if not os.path.isdir(extras_dir): | |
os.mkdir(extras_dir) | |
else: | |
print(f"dryrun: createdir: {extras_dir}") | |
ydl.download([link]) | |
except: | |
print(f"Error downloading trailer for {final_name}") | |
shutil.rmtree(extras_dir) | |
else: | |
msg = f"dryrun: {query} {link}" | |
print(msg) | |
if dryrun: | |
dryrun_log.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment