Skip to content

Instantly share code, notes, and snippets.

@tomeko
Last active January 7, 2024 21:51
Show Gist options
  • Save tomeko/df12139071acf2d163d96ca2d2272027 to your computer and use it in GitHub Desktop.
Save tomeko/df12139071acf2d163d96ca2d2272027 to your computer and use it in GitHub Desktop.
Download youtube movie trailers via a single file or batch download (for a directory of movies). Single file mode use case: run a trigger from jellyfin when content added
# trailer_dl.py
#
# Required packages: yt_dlp, youtubesearchpython
# Python: 3.8.10
#
# arguments:
# python trailer_dl.py <movie_file_or_directory>
# If a directory is given, it will find all mp4's one path deep. For example /my/media/movies (which contains /my/media/movie1/movie1 (2021).mp4, /my/media/movie2/movie2 (2020).mp4, etc.)
# If a file is given (.mp4), it will only get the trailer for that movie
# Either way, it makes an extras folder (works for jellyfin) in that particular movies directory and downloads the trailer there
# python trailer_dl.py dryrun <movie_file_or_directory>
# Runs a dryrun without any changes/downloads, records results to dryrun.txt
# python trailer_dl.py $(pwd)
# Runs in current directory
#
# other:
# If the extras folder already exists with an mp4, it will skip downloading the trailer.
# Search results work best with movie file format <title> (year)*.mp4. Example: "Name of the movie (2022) AAC.whateverelse.mp4"
# This was whipped together pretty quickly, no exception handling, etc. Use at your own risk.
# todos: force redownload if exists, trailer file extension/format choice
import sys, os, json, shutil, re, glob, yt_dlp, youtubesearchpython
max_trailer_sz_mb = 200
# format selector for yt_dlp, best mp4 video
def format_selector(ctx):
# formats are already sorted worst to best
formats = ctx.get('formats')[::-1]
# acodec='none' means there is no audio
best_video = next(f for f in formats
if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4')
# find compatible audio extension
audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
# vcodec='none' means there is no video
best_audio = next(f for f in formats if (
f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))
# These are the minimum required fields for a merged format
yield {
'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
'ext': best_video['ext'],
'requested_formats': [best_video, best_audio],
# Must be + separated list of protocols
'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
}
global dryrun_log
movie_files = []
dryrun = False
inparg = ''
work_path = ''
max_trailer_sz = max_trailer_sz_mb * 1024 * 1024
# check arguments, inparg will be file or directory
if len(sys.argv) == 3 and sys.argv[1] == 'dryrun':
dryrun = True
inparg = sys.argv[2]
print(f"dryrun started for {inparg}")
elif len(sys.argv) == 1:
print("input directory or file to scan")
sys.exit()
else:
inparg = sys.argv[1]
# check if inparg is file or directory, handle
if os.path.isfile(inparg):
if os.path.exists(inparg):
movie_files.append(inparg)
else:
print(f"Error: file {inparg} doesn't exist")
sys.exit()
elif os.path.isdir(inparg):
work_path = inparg
else:
print(f"Error: couldn't parse {inparg}")
if dryrun:
dryrun_log = open(os.path.join(work_path,'dryrun.txt'), 'w')
# get all movies from if directory passed as arg
if work_path:
print(f"Searching movies in path: {work_path}")
movie_files = glob.glob(os.path.join(work_path, '**/*.mp4') , recursive=False)
for movie_file in movie_files:
finf = os.path.split(os.path.abspath(movie_file))
fpath = finf[0] # file directory
fname = os.path.splitext(finf[1])[0] # filename without extension
title = ''
year = ''
extras_dir = os.path.join(fpath, "extras")
# check the extras dir
if os.path.isdir(extras_dir):
continue
#if any(tf.endswith('.mp4') for tf in os.listdir(extras_dir)):
# print(f"Already has extras, skipping: {fname}")
# continue
# see if we can extract the title and year from filename, otherwise just use the filename without extension (though this makes for worse search results/trailer matches)
match = re.match(r'(.*).*\((19|20\d{2})\).*', fname)
if match is not None:
title = match.group(1)
year = match.group(2)
else:
# try another regex
matches = re.search(r'^(?P<Name>.+?)(?!\.[12]\d\d\d\.\d{,3}[ip]\.)\.(?P<Year>\d\d\d\d)\.(?P<Resolution>[^.]+)\.(?P<Format>[^.]+)', fname)
if matches:
gd = matches.groupdict()
if 'Name' in gd:
title = matches.group('Name').replace('.', ' ')
if 'Year' in gd:
year = matches.group('Year')
if 'Name' not in gd:
title = fname
else:
title = fname
if title and year:
print(f"title: {title}, year: {year}")
# find link using youtubesearchpython
link = ''
query_txt = title
if year:
query_txt += f' {year}'
query = f'{query_txt} trailer'
videosSearch = youtubesearchpython.VideosSearch(query, limit = 1)
resroot = videosSearch.result()
if resroot and resroot['result']:
res = resroot['result'][0]
link = res['link']
if not link:
msg = f"Couldn't find a trailer for {movie_file}"
print(msg)
if dryrun:
dryrun_log.write(msg)
continue
if dryrun:
dryrun_log.write(f'file: {fname} query: {query} link: {link}\n')
final_name = f'{title} ({year})'
if title is None or year is None:
final_name = fname
# file out name
fout = os.path.join(extras_dir, final_name + '-trailer.mp4')
# yt_dlp options
# ydl_opts = {
# 'format': format_selector,
# 'outtmpl': fout,
# 'max_filesize': max_trailer_sz
# }
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
'default_search': 'auto',
'restrictfilenames': True,
'prefer_ffmpeg': True,
'quiet': True,
'no_warnings': True,
'ignoreerrors': True,
'noplaylist': True,
'noprogress': True,
'outtmpl': fout,
'max_filesize': max_trailer_sz
}
if not dryrun:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
print(f"Downloading trailer for {movie_file}")
try:
if not dryrun:
if not os.path.isdir(extras_dir):
os.mkdir(extras_dir)
else:
print(f"dryrun: createdir: {extras_dir}")
ydl.download([link])
except:
print(f"Error downloading trailer for {final_name}")
shutil.rmtree(extras_dir)
else:
msg = f"dryrun: {query} {link}"
print(msg)
if dryrun:
dryrun_log.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment