henryjfry · August 11, 2025 09:23 · henryjfry · Aug 6, 2025 · matthuisman · Aug 7, 2025
diff --git a/imdb_video_list.py b/imdb_video_list.py
 import json
 import requests
 import time

 def get_imdb_videos(imdb_id):
 	import re, requests
 	API_URL = "https://graphql.prod.api.imdb.a2z.com/"
 	HEADERS = {
 		'Referer': 'https://www.imdb.com/',
 		'Origin': 'https://www.imdb.com',
 		'User-Agent': 'Mozilla/5.0'
 	}

 	def gqlmin(q):
 		return re.sub(' {4}', '', q)

 	query_subpage = '''
 	query TitleVideoGallerySubPage(
 		$const: ID!,
 		$first: Int!,
 		$filter: VideosQueryFilter,
 		$sort: VideoSort
 	) {
 		title(id: $const) {
 			titleText { text }
 			plot { plotText { plainText } }
 			videoStrip(first: $first, filter: $filter, sort: $sort) {
 				...VideoGalleryItems
 			}
 		}
 	}
 	'''
 	query_pagination = '''
 	query TitleVideoGalleryPagination(
 		$const: ID!,
 		$first: Int!,
 		$after: ID!,
 		$filter: VideosQueryFilter,
 		$sort: VideoSort
 	) {
 		title(id: $const) {
 			videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
 				...VideoGalleryItems
 			}
 		}
 	}
 	'''
 	fragment = '''
 	fragment VideoGalleryItems on VideoConnection {
 		pageInfo {
 			endCursor
 			hasNextPage
 		}
 		total
 		edges {
 			node {
 				id
 				contentType { id }
 				name { value }
 				runtime { value }
 				thumbnail { url }
 				primaryTitle {
 					series {
 						displayableEpisodeNumber {
 							displayableSeason {
 								season
 							}
 						}
 						series {
 							titleText { text }
 						}
 					}
 				}
 			}
 		}
 	}
 	'''

 	variables = {
 		"const": imdb_id,
 		"first": 50,
 		"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
 		"sort": {"by": "DATE", "order": "DESC"}
 	}

 	videos = []
 	plot_text = ""
 	item_title = ""
 	total_videos = None

 	# First page
 	pdata = {
 		'operationName': "TitleVideoGallerySubPage",
 		'query': gqlmin(query_subpage + fragment),
 		'variables': variables
 	}
 	r = requests.post(API_URL, headers=HEADERS, json=pdata)
 	r.raise_for_status()
 	json_data = r.json()

 	title_data = json_data.get('data', {}).get('title', {})
 	plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
 	item_title = title_data.get('titleText', {}).get('text', "")

 	video_data = title_data.get('videoStrip', {})
 	total_videos = video_data.get('total')
 	videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])

 	cursor = video_data.get('pageInfo', {}).get('endCursor')
 	has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)

 	# Pagination loop
 	while has_next and cursor:
 		variables["after"] = cursor
 		pdata = {
 			'operationName': "TitleVideoGalleryPagination",
 			'query': gqlmin(query_pagination + fragment),
 			'variables': variables
 		}
 		r = requests.post(API_URL, headers=HEADERS, json=pdata)
 		r.raise_for_status()
 		video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
 		videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
 		cursor = video_data.get('pageInfo', {}).get('endCursor')
 		has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
 		time.sleep(0.3)

 	# Match old output: inject plot, total, and item_title
 	for idx, v in enumerate(videos):
 		v["plot"] = plot_text
 		v["total"] = total_videos
 		v["item_title"] = item_title
 		videos[idx] = v

 	return videos

 def time_format(seconds: int) -> str:
 	if seconds is not None:
 		seconds = int(seconds)
 		d = seconds // (3600 * 24)
 		h = seconds // 3600 % 24
 		m = seconds % 3600 // 60
 		s = seconds % 3600 % 60
 		if d > 0:
 			return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s)
 		elif h > 0:
 			return '{:02d}H {:02d}m {:02d}s'.format(h, m, s)
 		elif m > 0:
 			return '{:02d}m {:02d}s'.format(m, s)
 		elif s > 0:
 			return '{:02d}s'.format(s)
 	return '-'

 import re

 def extract_season_number(title):
 	# Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits
 	pattern = r"(:?.*(?:Season|Series))(?:\s*\d*)"
 	match = re.search(pattern, title, re.IGNORECASE)
 	try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip())
 	except: extract_season_number = None
 	return extract_season_number



 def find_best_trailer(trailer_list, season_number=None):
 	if len(trailer_list) == 0:
 		return None
 	best_match = None
 	best_score = -1
 	fallback_thumbnail = None
 	trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True)

 	match_list = []
 	new_trailer_list = []
 	season_list = []
 	official_flag = False
 	theatrical_list = ['theatrical','full','final']
 	theatrical_flag = False
 	titleText = None

 	for trailer in trailer_list:
 		if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer':
 			curr_dict = {}
 			if trailer['primaryTitle'].get('series',{}) != {}:
 				try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season'])
 				except: season = None
 				#print(trailer)
 				curr_dict['id'] =  trailer['id']
 				curr_dict['vid_url'] =  'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id']))
 				curr_dict['season'] = season
 				curr_dict['title'] = trailer['name']['value']
 				if season:
 					titleText = trailer['primaryTitle']['series']['series']['titleText']['text']
 				if not season:
 					season = extract_season_number(curr_dict['title'])
 					if season:
 						curr_dict['season'] = season

 				if  any(word in str(curr_dict['title']).lower() for word in theatrical_list):
 					curr_dict['theatrical'] = True
 					theatrical_flag = True
 				else:
 					curr_dict['theatrical'] = False

 				if 'official' in str(curr_dict['title']).lower():
 					curr_dict['official'] = True
 					official_flag = True
 					if season:
 						official_flag = False
 						curr_dict['official'] = False
 				else:
 					curr_dict['official'] = False
 				if season and not season in season_list:
 					season_list.append(season)
 				curr_dict['thumbnail'] = trailer['thumbnail']['url']
 				curr_dict['runtime'] = trailer['runtime']['value']
 				curr_dict['time'] = time_format(trailer['runtime']['value'])
 				#print(curr_dict['title'])
 				new_trailer_list.append(curr_dict)
 	
 	if season_number and season_number in season_list:
 		season_match = True
 	elif season_list != []:
 		if season_number:
 			for i in reversed(sorted(season_list)):
 				if i <= season_number:
 					break
 			season_match = i
 		else:
 			season_match = False
 	else:
 		season_match = False
 	
 	if type(season_match) == type(season_number):
 		if season_match > season_number:
 			season_match = False

 	offical_trailer = None
 	season_trailer = None
 	if season_match == True and type(season_match) == type(True):
 		for trailer in new_trailer_list:
 			if trailer['season'] == season_number:
 				season_trailer = trailer
 				break
 	elif season_match == False:
 		season_trailer = new_trailer_list[0]
 	else:
 		for trailer in new_trailer_list:
 			if trailer['season'] == season_match:
 				season_trailer = trailer
 				break

 	if theatrical_flag == True:
 		for trailer in new_trailer_list:
 			if trailer['theatrical']:
 				offical_trailer = trailer
 				break
 	elif official_flag == True:
 		for trailer in new_trailer_list:
 			if trailer['official'] and not 'teaser' in str(trailer['title']).lower():
 				offical_trailer = trailer
 				break
 		if not offical_trailer:
 			for trailer in new_trailer_list:
 				if trailer['official']:
 					offical_trailer = trailer
 					break

 	elif titleText:
 		for trailer in new_trailer_list:
 			if trailer['title'] == titleText:
 				offical_trailer = trailer
 				break

 	if offical_trailer and official_flag:
 		if season_match == False or season_trailer == None:
 			season_trailer = offical_trailer
 	elif official_flag == False and offical_trailer:
 		if season_match == False:
 			season_trailer = offical_trailer
 	#print(new_trailer_list)
 	#print(titleText)
 	return season_trailer

 def extract_imdb_mp4_url(video_id):
 	url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26"
 	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

 	response = requests.get(url, headers=headers)
 	if response.status_code != 200:
 		raise Exception(f"Failed to fetch page: {response.status_code}")

 	html = response.text
 	PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]')
 	url = None
 	for i in eval(PlaybackURL):
 		if i['videoMimeType'] == 'MP4':
 			return i['url'], i
 		else:
 			if not url:
 				url = i['url']
 				video = i
 		#print(i['videoDefinition'])
 		#print(i['videoMimeType'])
 	return url, video



 all_videos = get_imdb_videos(imdb_id='tt4532368')
 #print(all_videos)

 best_trailer = find_best_trailer(all_videos, season_number=None)

 if best_trailer:
 	print(best_trailer)
 	print(best_trailer['title'])


 	video_url, video = extract_imdb_mp4_url(best_trailer['id'])
 	print("MP4 URL:", video_url)
 	print(video)
 exit()


 #print(json.dumps(all_videos[:3], indent=2))  # Show first 3 videos
 print(f"Total videos fetched: {len(all_videos)}")
 print(all_videos)
 for i in all_videos:
 	if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower():
 		print(i['name']['value'],' - ' ,time_format(i['runtime']['value']))
	import json
	import requests
	import time

	def get_imdb_videos(imdb_id):
	import re, requests
	API_URL = "https://graphql.prod.api.imdb.a2z.com/"
	HEADERS = {
	'Referer': 'https://www.imdb.com/',
	'Origin': 'https://www.imdb.com',
	'User-Agent': 'Mozilla/5.0'
	}

	def gqlmin(q):
	return re.sub(' {4}', '', q)

	query_subpage = '''
	query TitleVideoGallerySubPage(
	$const: ID!,
	$first: Int!,
	$filter: VideosQueryFilter,
	$sort: VideoSort
	) {
	title(id: $const) {
	titleText { text }
	plot { plotText { plainText } }
	videoStrip(first: $first, filter: $filter, sort: $sort) {
	...VideoGalleryItems
	}
	}
	}
	'''
	query_pagination = '''
	query TitleVideoGalleryPagination(
	$const: ID!,
	$first: Int!,
	$after: ID!,
	$filter: VideosQueryFilter,
	$sort: VideoSort
	) {
	title(id: $const) {
	videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
	...VideoGalleryItems
	}
	}
	}
	'''
	fragment = '''
	fragment VideoGalleryItems on VideoConnection {
	pageInfo {
	endCursor
	hasNextPage
	}
	total
	edges {
	node {
	id
	contentType { id }
	name { value }
	runtime { value }
	thumbnail { url }
	primaryTitle {
	series {
	displayableEpisodeNumber {
	displayableSeason {
	season
	}
	}
	series {
	titleText { text }
	}
	}
	}
	}
	}
	}
	'''

	variables = {
	"const": imdb_id,
	"first": 50,
	"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
	"sort": {"by": "DATE", "order": "DESC"}
	}

	videos = []
	plot_text = ""
	item_title = ""
	total_videos = None

	# First page
	pdata = {
	'operationName': "TitleVideoGallerySubPage",
	'query': gqlmin(query_subpage + fragment),
	'variables': variables
	}
	r = requests.post(API_URL, headers=HEADERS, json=pdata)
	r.raise_for_status()
	json_data = r.json()

	title_data = json_data.get('data', {}).get('title', {})
	plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
	item_title = title_data.get('titleText', {}).get('text', "")

	video_data = title_data.get('videoStrip', {})
	total_videos = video_data.get('total')
	videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])

	cursor = video_data.get('pageInfo', {}).get('endCursor')
	has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)

	# Pagination loop
	while has_next and cursor:
	variables["after"] = cursor
	pdata = {
	'operationName': "TitleVideoGalleryPagination",
	'query': gqlmin(query_pagination + fragment),
	'variables': variables
	}
	r = requests.post(API_URL, headers=HEADERS, json=pdata)
	r.raise_for_status()
	video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
	videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
	cursor = video_data.get('pageInfo', {}).get('endCursor')
	has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
	time.sleep(0.3)

	# Match old output: inject plot, total, and item_title
	for idx, v in enumerate(videos):
	v["plot"] = plot_text
	v["total"] = total_videos
	v["item_title"] = item_title
	videos[idx] = v

	return videos

	def time_format(seconds: int) -> str:
	if seconds is not None:
	seconds = int(seconds)
	d = seconds // (3600 * 24)
	h = seconds // 3600 % 24
	m = seconds % 3600 // 60
	s = seconds % 3600 % 60
	if d > 0:
	return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s)
	elif h > 0:
	return '{:02d}H {:02d}m {:02d}s'.format(h, m, s)
	elif m > 0:
	return '{:02d}m {:02d}s'.format(m, s)
	elif s > 0:
	return '{:02d}s'.format(s)
	return '-'

	import re

	def extract_season_number(title):
	# Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits
	pattern = r"(:?.(?:Season\|Series))(?:\s\d*)"
	match = re.search(pattern, title, re.IGNORECASE)
	try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip())
	except: extract_season_number = None
	return extract_season_number



	def find_best_trailer(trailer_list, season_number=None):
	if len(trailer_list) == 0:
	return None
	best_match = None
	best_score = -1
	fallback_thumbnail = None
	trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True)

	match_list = []
	new_trailer_list = []
	season_list = []
	official_flag = False
	theatrical_list = ['theatrical','full','final']
	theatrical_flag = False
	titleText = None

	for trailer in trailer_list:
	if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer':
	curr_dict = {}
	if trailer['primaryTitle'].get('series',{}) != {}:
	try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season'])
	except: season = None
	#print(trailer)
	curr_dict['id'] = trailer['id']
	curr_dict['vid_url'] = 'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id']))
	curr_dict['season'] = season
	curr_dict['title'] = trailer['name']['value']
	if season:
	titleText = trailer['primaryTitle']['series']['series']['titleText']['text']
	if not season:
	season = extract_season_number(curr_dict['title'])
	if season:
	curr_dict['season'] = season

	if any(word in str(curr_dict['title']).lower() for word in theatrical_list):
	curr_dict['theatrical'] = True
	theatrical_flag = True
	else:
	curr_dict['theatrical'] = False

	if 'official' in str(curr_dict['title']).lower():
	curr_dict['official'] = True
	official_flag = True
	if season:
	official_flag = False
	curr_dict['official'] = False
	else:
	curr_dict['official'] = False
	if season and not season in season_list:
	season_list.append(season)
	curr_dict['thumbnail'] = trailer['thumbnail']['url']
	curr_dict['runtime'] = trailer['runtime']['value']
	curr_dict['time'] = time_format(trailer['runtime']['value'])
	#print(curr_dict['title'])
	new_trailer_list.append(curr_dict)

	if season_number and season_number in season_list:
	season_match = True
	elif season_list != []:
	if season_number:
	for i in reversed(sorted(season_list)):
	if i <= season_number:
	break
	season_match = i
	else:
	season_match = False
	else:
	season_match = False

	if type(season_match) == type(season_number):
	if season_match > season_number:
	season_match = False

	offical_trailer = None
	season_trailer = None
	if season_match == True and type(season_match) == type(True):
	for trailer in new_trailer_list:
	if trailer['season'] == season_number:
	season_trailer = trailer
	break
	elif season_match == False:
	season_trailer = new_trailer_list[0]
	else:
	for trailer in new_trailer_list:
	if trailer['season'] == season_match:
	season_trailer = trailer
	break

	if theatrical_flag == True:
	for trailer in new_trailer_list:
	if trailer['theatrical']:
	offical_trailer = trailer
	break
	elif official_flag == True:
	for trailer in new_trailer_list:
	if trailer['official'] and not 'teaser' in str(trailer['title']).lower():
	offical_trailer = trailer
	break
	if not offical_trailer:
	for trailer in new_trailer_list:
	if trailer['official']:
	offical_trailer = trailer
	break

	elif titleText:
	for trailer in new_trailer_list:
	if trailer['title'] == titleText:
	offical_trailer = trailer
	break

	if offical_trailer and official_flag:
	if season_match == False or season_trailer == None:
	season_trailer = offical_trailer
	elif official_flag == False and offical_trailer:
	if season_match == False:
	season_trailer = offical_trailer
	#print(new_trailer_list)
	#print(titleText)
	return season_trailer

	def extract_imdb_mp4_url(video_id):
	url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26"
	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	raise Exception(f"Failed to fetch page: {response.status_code}")

	html = response.text
	PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]')
	url = None
	for i in eval(PlaybackURL):
	if i['videoMimeType'] == 'MP4':
	return i['url'], i
	else:
	if not url:
	url = i['url']
	video = i
	#print(i['videoDefinition'])
	#print(i['videoMimeType'])
	return url, video



	all_videos = get_imdb_videos(imdb_id='tt4532368')
	#print(all_videos)

	best_trailer = find_best_trailer(all_videos, season_number=None)

	if best_trailer:
	print(best_trailer)
	print(best_trailer['title'])


	video_url, video = extract_imdb_mp4_url(best_trailer['id'])
	print("MP4 URL:", video_url)
	print(video)
	exit()


	#print(json.dumps(all_videos[:3], indent=2)) # Show first 3 videos
	print(f"Total videos fetched: {len(all_videos)}")
	print(all_videos)
	for i in all_videos:
	if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower():
	print(i['name']['value'],' - ' ,time_format(i['runtime']['value']))