tomeko · January 7, 2024 21:51
diff --git a/trailer_dl.py b/trailer_dl.py
 # trailer_dl.py
 #
 # Required packages: yt_dlp, youtubesearchpython
 # Python: 3.8.10
 #
 # arguments:
 # python trailer_dl.py <movie_file_or_directory>
 #  	If a directory is given, it will find all mp4's one path deep. For example /my/media/movies (which contains /my/media/movie1/movie1 (2021).mp4, /my/media/movie2/movie2 (2020).mp4, etc.)
 #   If a file is given (.mp4), it will only get the trailer for that movie
 #		Either way, it makes an extras folder (works for jellyfin) in that particular movies directory and downloads the trailer there
 #  python trailer_dl.py dryrun <movie_file_or_directory>
 #	Runs a dryrun without any changes/downloads, records results to dryrun.txt
 #  python trailer_dl.py $(pwd)
 #	Runs in current directory
 #
 # other:
 #  If the extras folder already exists with an mp4, it will skip downloading the trailer.
 #  Search results work best with movie file format <title> (year)*.mp4. Example: "Name of the movie (2022) AAC.whateverelse.mp4"
 #  This was whipped together pretty quickly, no exception handling, etc. Use at your own risk.
 #  todos: force redownload if exists, trailer file extension/format choice

 import sys, os, json, shutil, re, glob, yt_dlp, youtubesearchpython

 max_trailer_sz_mb = 200

 # format selector for yt_dlp, best mp4 video
 def format_selector(ctx):
 	# formats are already sorted worst to best
 	formats = ctx.get('formats')[::-1]

 	# acodec='none' means there is no audio
 	best_video = next(f for f in formats
 					  if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4')

 	# find compatible audio extension
 	audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
 	# vcodec='none' means there is no video
 	best_audio = next(f for f in formats if (
 		f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))

 	# These are the minimum required fields for a merged format
 	yield {
 		'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
 		'ext': best_video['ext'],
 		'requested_formats': [best_video, best_audio],
 		# Must be + separated list of protocols
 		'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
 	}

 global dryrun_log

 movie_files = []

 dryrun = False

 inparg = ''
 work_path = ''

 max_trailer_sz = max_trailer_sz_mb * 1024 * 1024

 # check arguments, inparg will be file or directory
 if len(sys.argv) == 3 and sys.argv[1] == 'dryrun':
 	dryrun = True
 	inparg = sys.argv[2]
 	print(f"dryrun started for {inparg}")
 elif len(sys.argv) == 1:
 	print("input directory or file to scan")
 	sys.exit()
 else:
 	inparg = sys.argv[1]

 # check if inparg is file or directory, handle
 if os.path.isfile(inparg):
 	if os.path.exists(inparg):
 		movie_files.append(inparg)
 	else:
 		print(f"Error: file {inparg} doesn't exist")
 		sys.exit()
 elif os.path.isdir(inparg):
 	work_path = inparg
 else:
 	print(f"Error: couldn't parse {inparg}")

 if dryrun:
 	dryrun_log = open(os.path.join(work_path,'dryrun.txt'), 'w')

 # get all movies from if directory passed as arg
 if work_path:
 	print(f"Searching movies in path: {work_path}")
 	movie_files = glob.glob(os.path.join(work_path, '**/*.mp4') , recursive=False)

 for movie_file in movie_files:

 	finf = os.path.split(os.path.abspath(movie_file))
 	fpath = finf[0]							# file directory
 	fname = os.path.splitext(finf[1])[0]	# filename without extension

 	title = ''
 	year = ''
 	
 	extras_dir = os.path.join(fpath, "extras")
 	
 	# check the extras dir
 	if os.path.isdir(extras_dir):
 		continue
 		#if any(tf.endswith('.mp4') for tf in os.listdir(extras_dir)):
 		#	print(f"Already has extras, skipping: {fname}")
 		#	continue

 	# see if we can extract the title and year from filename, otherwise just use the filename without extension (though this makes for worse search results/trailer matches)
 	match = re.match(r'(.*).*\((19|20\d{2})\).*', fname)
 	if match is not None:
 		title = match.group(1)
 		year = match.group(2)
 	else:
 		# try another regex
 		matches = re.search(r'^(?P<Name>.+?)(?!\.[12]\d\d\d\.\d{,3}[ip]\.)\.(?P<Year>\d\d\d\d)\.(?P<Resolution>[^.]+)\.(?P<Format>[^.]+)', fname)
 		if matches:
 			gd = matches.groupdict()
 			if 'Name' in gd:
 				title = matches.group('Name').replace('.', ' ')
 			if 'Year' in gd:
 				year = matches.group('Year')
 			if 'Name' not in gd:
 				title = fname
 		else:
 			title = fname
 	if title and year:
 		print(f"title: {title}, year: {year}")

 	# find link using youtubesearchpython
 	link = ''

 	query_txt = title
 	if year:
 		query_txt += f' {year}'
 	query = f'{query_txt} trailer'

 	videosSearch = youtubesearchpython.VideosSearch(query, limit = 1)
 	resroot = videosSearch.result()
 	if resroot and resroot['result']:
 		res = resroot['result'][0]
 		link = res['link']

 	if not link:
 		msg = f"Couldn't find a trailer for {movie_file}"
 		print(msg)
 		if dryrun:
 			dryrun_log.write(msg)
 		continue

 	if dryrun:
 		dryrun_log.write(f'file: {fname} query: {query} link: {link}\n')

 	final_name = f'{title} ({year})'
 	if title is None or year is None:
 		final_name = fname

 	# file out name
 	fout = os.path.join(extras_dir, final_name + '-trailer.mp4')

 	# yt_dlp options
 	# ydl_opts = {
 	# 	'format': format_selector,
 	# 	'outtmpl': fout,
 	# 	'max_filesize': max_trailer_sz
 	# }
 	
 	ydl_opts = {
 		'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
 		'default_search': 'auto',
 		'restrictfilenames': True,
 		'prefer_ffmpeg': True,
 		'quiet': True,
 		'no_warnings': True,
 		'ignoreerrors': True,
 		'noplaylist': True,
 		'noprogress': True,
 		'outtmpl': fout,
 		'max_filesize': max_trailer_sz
 	}

 	if not dryrun:
 		with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 			print(f"Downloading trailer for {movie_file}")
 			try:
 				if not dryrun:
 					if not os.path.isdir(extras_dir):
 						os.mkdir(extras_dir)
 				else:
 					print(f"dryrun: createdir: {extras_dir}")

 				ydl.download([link])
 			except:
 				print(f"Error downloading trailer for {final_name}")
 				shutil.rmtree(extras_dir)

 	else:
 		msg = f"dryrun: {query} {link}"
 		print(msg)

 if dryrun:
 	dryrun_log.close()
	# trailer_dl.py
	#
	# Required packages: yt_dlp, youtubesearchpython
	# Python: 3.8.10
	#
	# arguments:
	# python trailer_dl.py <movie_file_or_directory>
	# If a directory is given, it will find all mp4's one path deep. For example /my/media/movies (which contains /my/media/movie1/movie1 (2021).mp4, /my/media/movie2/movie2 (2020).mp4, etc.)
	# If a file is given (.mp4), it will only get the trailer for that movie
	# Either way, it makes an extras folder (works for jellyfin) in that particular movies directory and downloads the trailer there
	# python trailer_dl.py dryrun <movie_file_or_directory>
	# Runs a dryrun without any changes/downloads, records results to dryrun.txt
	# python trailer_dl.py $(pwd)
	# Runs in current directory
	#
	# other:
	# If the extras folder already exists with an mp4, it will skip downloading the trailer.
	# Search results work best with movie file format <title> (year)*.mp4. Example: "Name of the movie (2022) AAC.whateverelse.mp4"
	# This was whipped together pretty quickly, no exception handling, etc. Use at your own risk.
	# todos: force redownload if exists, trailer file extension/format choice

	import sys, os, json, shutil, re, glob, yt_dlp, youtubesearchpython

	max_trailer_sz_mb = 200

	# format selector for yt_dlp, best mp4 video
	def format_selector(ctx):
	# formats are already sorted worst to best
	formats = ctx.get('formats')[::-1]

	# acodec='none' means there is no audio
	best_video = next(f for f in formats
	if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4')

	# find compatible audio extension
	audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
	# vcodec='none' means there is no video
	best_audio = next(f for f in formats if (
	f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))

	# These are the minimum required fields for a merged format
	yield {
	'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
	'ext': best_video['ext'],
	'requested_formats': [best_video, best_audio],
	# Must be + separated list of protocols
	'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
	}

	global dryrun_log

	movie_files = []

	dryrun = False

	inparg = ''
	work_path = ''

	max_trailer_sz = max_trailer_sz_mb * 1024 * 1024

	# check arguments, inparg will be file or directory
	if len(sys.argv) == 3 and sys.argv[1] == 'dryrun':
	dryrun = True
	inparg = sys.argv[2]
	print(f"dryrun started for {inparg}")
	elif len(sys.argv) == 1:
	print("input directory or file to scan")
	sys.exit()
	else:
	inparg = sys.argv[1]

	# check if inparg is file or directory, handle
	if os.path.isfile(inparg):
	if os.path.exists(inparg):
	movie_files.append(inparg)
	else:
	print(f"Error: file {inparg} doesn't exist")
	sys.exit()
	elif os.path.isdir(inparg):
	work_path = inparg
	else:
	print(f"Error: couldn't parse {inparg}")

	if dryrun:
	dryrun_log = open(os.path.join(work_path,'dryrun.txt'), 'w')

	# get all movies from if directory passed as arg
	if work_path:
	print(f"Searching movies in path: {work_path}")
	movie_files = glob.glob(os.path.join(work_path, '*/.mp4') , recursive=False)

	for movie_file in movie_files:

	finf = os.path.split(os.path.abspath(movie_file))
	fpath = finf[0] # file directory
	fname = os.path.splitext(finf[1])[0] # filename without extension

	title = ''
	year = ''

	extras_dir = os.path.join(fpath, "extras")

	# check the extras dir
	if os.path.isdir(extras_dir):
	continue
	#if any(tf.endswith('.mp4') for tf in os.listdir(extras_dir)):
	# print(f"Already has extras, skipping: {fname}")
	# continue

	# see if we can extract the title and year from filename, otherwise just use the filename without extension (though this makes for worse search results/trailer matches)
	match = re.match(r'(.).\((19\|20\d{2})\).*', fname)
	if match is not None:
	title = match.group(1)
	year = match.group(2)
	else:
	# try another regex
	matches = re.search(r'^(?P<Name>.+?)(?!\.[12]\d\d\d\.\d{,3}[ip]\.)\.(?P<Year>\d\d\d\d)\.(?P<Resolution>[^.]+)\.(?P<Format>[^.]+)', fname)
	if matches:
	gd = matches.groupdict()
	if 'Name' in gd:
	title = matches.group('Name').replace('.', ' ')
	if 'Year' in gd:
	year = matches.group('Year')
	if 'Name' not in gd:
	title = fname
	else:
	title = fname
	if title and year:
	print(f"title: {title}, year: {year}")

	# find link using youtubesearchpython
	link = ''

	query_txt = title
	if year:
	query_txt += f' {year}'
	query = f'{query_txt} trailer'

	videosSearch = youtubesearchpython.VideosSearch(query, limit = 1)
	resroot = videosSearch.result()
	if resroot and resroot['result']:
	res = resroot['result'][0]
	link = res['link']

	if not link:
	msg = f"Couldn't find a trailer for {movie_file}"
	print(msg)
	if dryrun:
	dryrun_log.write(msg)
	continue

	if dryrun:
	dryrun_log.write(f'file: {fname} query: {query} link: {link}\n')

	final_name = f'{title} ({year})'
	if title is None or year is None:
	final_name = fname

	# file out name
	fout = os.path.join(extras_dir, final_name + '-trailer.mp4')

	# yt_dlp options
	# ydl_opts = {
	# 'format': format_selector,
	# 'outtmpl': fout,
	# 'max_filesize': max_trailer_sz
	# }

	ydl_opts = {
	'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
	'default_search': 'auto',
	'restrictfilenames': True,
	'prefer_ffmpeg': True,
	'quiet': True,
	'no_warnings': True,
	'ignoreerrors': True,
	'noplaylist': True,
	'noprogress': True,
	'outtmpl': fout,
	'max_filesize': max_trailer_sz
	}

	if not dryrun:
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	print(f"Downloading trailer for {movie_file}")
	try:
	if not dryrun:
	if not os.path.isdir(extras_dir):
	os.mkdir(extras_dir)
	else:
	print(f"dryrun: createdir: {extras_dir}")

	ydl.download([link])
	except:
	print(f"Error downloading trailer for {final_name}")
	shutil.rmtree(extras_dir)

	else:
	msg = f"dryrun: {query} {link}"
	print(msg)

	if dryrun:
	dryrun_log.close()