Skip to content

Instantly share code, notes, and snippets.

@henryjfry
Last active August 11, 2025 09:23
Show Gist options
  • Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.
Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.
IDMB trailer lookup
import json
import requests
import time
def get_imdb_videos(imdb_id):
import re, requests
API_URL = "https://graphql.prod.api.imdb.a2z.com/"
HEADERS = {
'Referer': 'https://www.imdb.com/',
'Origin': 'https://www.imdb.com',
'User-Agent': 'Mozilla/5.0'
}
def gqlmin(q):
return re.sub(' {4}', '', q)
query_subpage = '''
query TitleVideoGallerySubPage(
$const: ID!,
$first: Int!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
titleText { text }
plot { plotText { plainText } }
videoStrip(first: $first, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
query_pagination = '''
query TitleVideoGalleryPagination(
$const: ID!,
$first: Int!,
$after: ID!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
fragment = '''
fragment VideoGalleryItems on VideoConnection {
pageInfo {
endCursor
hasNextPage
}
total
edges {
node {
id
contentType { id }
name { value }
runtime { value }
thumbnail { url }
primaryTitle {
series {
displayableEpisodeNumber {
displayableSeason {
season
}
}
series {
titleText { text }
}
}
}
}
}
}
'''
variables = {
"const": imdb_id,
"first": 50,
"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
"sort": {"by": "DATE", "order": "DESC"}
}
videos = []
plot_text = ""
item_title = ""
total_videos = None
# First page
pdata = {
'operationName': "TitleVideoGallerySubPage",
'query': gqlmin(query_subpage + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
json_data = r.json()
title_data = json_data.get('data', {}).get('title', {})
plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
item_title = title_data.get('titleText', {}).get('text', "")
video_data = title_data.get('videoStrip', {})
total_videos = video_data.get('total')
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
# Pagination loop
while has_next and cursor:
variables["after"] = cursor
pdata = {
'operationName': "TitleVideoGalleryPagination",
'query': gqlmin(query_pagination + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
time.sleep(0.3)
# Match old output: inject plot, total, and item_title
for idx, v in enumerate(videos):
v["plot"] = plot_text
v["total"] = total_videos
v["item_title"] = item_title
videos[idx] = v
return videos
def time_format(seconds: int) -> str:
if seconds is not None:
seconds = int(seconds)
d = seconds // (3600 * 24)
h = seconds // 3600 % 24
m = seconds % 3600 // 60
s = seconds % 3600 % 60
if d > 0:
return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s)
elif h > 0:
return '{:02d}H {:02d}m {:02d}s'.format(h, m, s)
elif m > 0:
return '{:02d}m {:02d}s'.format(m, s)
elif s > 0:
return '{:02d}s'.format(s)
return '-'
import re
def extract_season_number(title):
# Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits
pattern = r"(:?.*(?:Season|Series))(?:\s*\d*)"
match = re.search(pattern, title, re.IGNORECASE)
try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip())
except: extract_season_number = None
return extract_season_number
def find_best_trailer(trailer_list, season_number=None):
if len(trailer_list) == 0:
return None
best_match = None
best_score = -1
fallback_thumbnail = None
trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True)
match_list = []
new_trailer_list = []
season_list = []
official_flag = False
theatrical_list = ['theatrical','full','final']
theatrical_flag = False
titleText = None
for trailer in trailer_list:
if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer':
curr_dict = {}
if trailer['primaryTitle'].get('series',{}) != {}:
try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season'])
except: season = None
#print(trailer)
curr_dict['id'] = trailer['id']
curr_dict['vid_url'] = 'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id']))
curr_dict['season'] = season
curr_dict['title'] = trailer['name']['value']
if season:
titleText = trailer['primaryTitle']['series']['series']['titleText']['text']
if not season:
season = extract_season_number(curr_dict['title'])
if season:
curr_dict['season'] = season
if any(word in str(curr_dict['title']).lower() for word in theatrical_list):
curr_dict['theatrical'] = True
theatrical_flag = True
else:
curr_dict['theatrical'] = False
if 'official' in str(curr_dict['title']).lower():
curr_dict['official'] = True
official_flag = True
if season:
official_flag = False
curr_dict['official'] = False
else:
curr_dict['official'] = False
if season and not season in season_list:
season_list.append(season)
curr_dict['thumbnail'] = trailer['thumbnail']['url']
curr_dict['runtime'] = trailer['runtime']['value']
curr_dict['time'] = time_format(trailer['runtime']['value'])
#print(curr_dict['title'])
new_trailer_list.append(curr_dict)
if season_number and season_number in season_list:
season_match = True
elif season_list != []:
if season_number:
for i in reversed(sorted(season_list)):
if i <= season_number:
break
season_match = i
else:
season_match = False
else:
season_match = False
if type(season_match) == type(season_number):
if season_match > season_number:
season_match = False
offical_trailer = None
season_trailer = None
if season_match == True and type(season_match) == type(True):
for trailer in new_trailer_list:
if trailer['season'] == season_number:
season_trailer = trailer
break
elif season_match == False:
season_trailer = new_trailer_list[0]
else:
for trailer in new_trailer_list:
if trailer['season'] == season_match:
season_trailer = trailer
break
if theatrical_flag == True:
for trailer in new_trailer_list:
if trailer['theatrical']:
offical_trailer = trailer
break
elif official_flag == True:
for trailer in new_trailer_list:
if trailer['official'] and not 'teaser' in str(trailer['title']).lower():
offical_trailer = trailer
break
if not offical_trailer:
for trailer in new_trailer_list:
if trailer['official']:
offical_trailer = trailer
break
elif titleText:
for trailer in new_trailer_list:
if trailer['title'] == titleText:
offical_trailer = trailer
break
if offical_trailer and official_flag:
if season_match == False or season_trailer == None:
season_trailer = offical_trailer
elif official_flag == False and offical_trailer:
if season_match == False:
season_trailer = offical_trailer
#print(new_trailer_list)
#print(titleText)
return season_trailer
def extract_imdb_mp4_url(video_id):
url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise Exception(f"Failed to fetch page: {response.status_code}")
html = response.text
PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]')
url = None
for i in eval(PlaybackURL):
if i['videoMimeType'] == 'MP4':
return i['url'], i
else:
if not url:
url = i['url']
video = i
#print(i['videoDefinition'])
#print(i['videoMimeType'])
return url, video
all_videos = get_imdb_videos(imdb_id='tt4532368')
#print(all_videos)
best_trailer = find_best_trailer(all_videos, season_number=None)
if best_trailer:
print(best_trailer)
print(best_trailer['title'])
video_url, video = extract_imdb_mp4_url(best_trailer['id'])
print("MP4 URL:", video_url)
print(video)
exit()
#print(json.dumps(all_videos[:3], indent=2)) # Show first 3 videos
print(f"Total videos fetched: {len(all_videos)}")
print(all_videos)
for i in all_videos:
if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower():
print(i['name']['value'],' - ' ,time_format(i['runtime']['value']))
@henryjfry
Copy link
Author

henryjfry commented Aug 6, 2025

all_videos = get_imdb_videos(imdb_id='tt4532368')
best_trailer = find_best_trailer(all_videos, season_number=None)

if best_trailer:
	print(best_trailer)
	print(best_trailer['title'])
	video_url, video = extract_imdb_mp4_url(best_trailer['id'])
	print("MP4 URL:", video_url)
	print(video)

@matthuisman
Copy link

found a small issue when a trailer contains Final but is for final season
eg. Beter Call Sault "Better Call Saul: A Look At The Final Season"
It gets picked up as theatrical trailer
To stop this, i just did a 2nd check that it doesnt contain season

@henryjfry
Copy link
Author

Aye it definitely needs tweaking, if you figure out some iron clad logic to filter down to a series match if it's there or a show trailer if not then let me know.

Although that's probably what to do, movie trailer, show trailer and season trailer if needed.

@Gujal00
Copy link

Gujal00 commented Aug 9, 2025

Thanks to the pointers about TitleVideoGallerySubPage and TitleVideoGalleryPagination I have reverse engineered the raw query and implemented in Gujal00/Kodi-Official@72bab2d

You can now use the following in STRM files to test

Severance:
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt11280740 will play latest available trailer
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt11280740&season=3 will play the S3 teaser as there is no trailer yet

Game of Thrones: (Good test as IMDB has 303 videos and you have search through those for season matching)
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt0944947&season=1
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt0944947&season=7

Better Call Saul:
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt3032476&season=6

@henryjfry
Copy link
Author

henryjfry commented Aug 9, 2025

Thanks to the pointers about TitleVideoGallerySubPage and TitleVideoGalleryPagination I have reverse engineered the raw query and implemented in Gujal00/Kodi-Official@72bab2d

How did you do that?
I have a couple of other persisted queries id like to make more permanent AdvancedTitleSearch and ListsPage
But i'm not a java or web guy so without significant help from chatgpt to look at JS for me im pretty lost.

EDIT:

Actually i was able to figure it out myself when i looked at your updated code and saw the "fragment VideoGalleryItems on VideoConnection" part. Knowing that my original queries worked in the different format on the other endpoint helped a lot.

I was then able to look where i found the original persisted queries i was working with and gather enough information to get copilot to produce versions which could be run without being "persistedQuery", after not too much prompting.

Although to get it to actually listen to my query and not do something random i had to write a very long prompt in a text file and upload it; this function works, this function works differently, this is the original verison of the updated function which was a persistedQuery, here are multiple JS files etc.
And i had to include the entire contents of the JS in line too because the references were across multiple JS files and it was getting confused if i uploaded them separately.
So if you are working with copilot yourself at all for further JS inspection i suggest you put all your queries in line in a text file, seems more reliable that way. I'd prefer to use chatgpt and it seems more reliable but i run out of tokens pretty quickly and i only get copilot because we have microsoft 365 with work.

@Gujal00
Copy link

Gujal00 commented Aug 9, 2025

I stay away from AI coding and I am not a programmer by profession either. Just trial and error in Python at hobbyist level to get things going :)

Ideally i would like to find out the nameConstraints or titleConstraints to filter only trailers to be returned rather than all videos, but havent been able to figure that out yet, may be fellow kiwi @matthuisman has some ideas

@henryjfry
Copy link
Author

henryjfry commented Aug 10, 2025

Yeah I don't do too much Ai coding either but as I don't know JavaScript at all I never would have been able to figure out how to make a compliant query without it.
But I started out trial and error in the terminal too.
Although I now do programming adjacent stuff for work. We don't really use AI as there are GDPR data issues involved but the copilot thing is new and is our own instance I believe so we've been curious about the capabilities and trialing it a bit in our area.
But I mostly do SQL on random data so ai isn't much help as half the thing is figuring out what you are looking at.

But when I am doing random things it is pretty handy to now have access to an interactive stack overflow on steroids.
Does it hallucinate occasionally, yes. But can it parse badly documented/undocumented code and give good info back, yes surprisingly it can. And getting working example code relevant to your problem when you go googling is often half the battle so it's definitely a valuable tool.

FYI the video properties returned contain contentType which has a trailer/clip info like "amzn1.imdb.video.contenttype.trailer" which might be what you need?

Otherwise maybe the query has those as variables inputs?
They might be called nameSearchConstraints?
That difference (IE search) was an error I saw myself (did you mean...)

@Gujal00
Copy link

Gujal00 commented Aug 10, 2025

Yes I already tried contenttype as a filter key and it came with incorrect parameter response, so yeah that is exactly what I am targeting and yet to figure out

@henryjfry
Copy link
Author

Think this should be what you need:

"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},

import json
import requests
import time

def get_imdb_videos(imdb_id):
	import re, requests
	API_URL = "https://graphql.prod.api.imdb.a2z.com/"
	HEADERS = {
		'Referer': 'https://www.imdb.com/',
		'Origin': 'https://www.imdb.com',
		'User-Agent': 'Mozilla/5.0'
	}

	def gqlmin(q):
		return re.sub(' {4}', '', q)

	query_subpage = '''
	query TitleVideoGallerySubPage(
		$const: ID!,
		$first: Int!,
		$filter: VideosQueryFilter,
		$sort: VideoSort
	) {
		title(id: $const) {
			titleText { text }
			plot { plotText { plainText } }
			videoStrip(first: $first, filter: $filter, sort: $sort) {
				...VideoGalleryItems
			}
		}
	}
	'''
	query_pagination = '''
	query TitleVideoGalleryPagination(
		$const: ID!,
		$first: Int!,
		$after: ID!,
		$filter: VideosQueryFilter,
		$sort: VideoSort
	) {
		title(id: $const) {
			videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
				...VideoGalleryItems
			}
		}
	}
	'''
	fragment = '''
	fragment VideoGalleryItems on VideoConnection {
		pageInfo {
			endCursor
			hasNextPage
		}
		total
		edges {
			node {
				id
				contentType { id }
				name { value }
				runtime { value }
				thumbnail { url }
				primaryTitle {
					series {
						displayableEpisodeNumber {
							displayableSeason {
								season
							}
						}
						series {
							titleText { text }
						}
					}
				}
			}
		}
	}
	'''

	variables = {
		"const": imdb_id,
		"first": 50,
		"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
		"sort": {"by": "DATE", "order": "DESC"}
	}

	videos = []
	plot_text = ""
	item_title = ""
	total_videos = None

	# First page
	pdata = {
		'operationName': "TitleVideoGallerySubPage",
		'query': gqlmin(query_subpage + fragment),
		'variables': variables
	}
	r = requests.post(API_URL, headers=HEADERS, json=pdata)
	r.raise_for_status()
	json_data = r.json()

	title_data = json_data.get('data', {}).get('title', {})
	plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
	item_title = title_data.get('titleText', {}).get('text', "")

	video_data = title_data.get('videoStrip', {})
	total_videos = video_data.get('total')
	videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])

	cursor = video_data.get('pageInfo', {}).get('endCursor')
	has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)

	# Pagination loop
	while has_next and cursor:
		variables["after"] = cursor
		pdata = {
			'operationName': "TitleVideoGalleryPagination",
			'query': gqlmin(query_pagination + fragment),
			'variables': variables
		}
		r = requests.post(API_URL, headers=HEADERS, json=pdata)
		r.raise_for_status()
		video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
		videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
		cursor = video_data.get('pageInfo', {}).get('endCursor')
		has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
		time.sleep(0.3)

	# Match old output: inject plot, total, and item_title
	for idx, v in enumerate(videos):
		v["plot"] = plot_text
		v["total"] = total_videos
		v["item_title"] = item_title
		videos[idx] = v

	return videos




all_videos = get_imdb_videos(imdb_id='tt11280740')
print(all_videos)
exit()

@henryjfry
Copy link
Author

however other than "types", nameConstraints are "nameConstraints":{allNameIds":["nm0004395","nm3138882"]} "

And titleconstraints:

nameConstraints: {
                    allNameIds: r.nameIds?.sort( (e, t) => e.localeCompare(t))
                },
                titleConstraints: {
                    anyTitleIds: r.titleIds?.sort( (e, t) => e.localeCompare(t))

ie anyTitleIds being - tt11280740 eg IMDB ids. So not actually "video clip name" or "video clip title"

@henryjfry
Copy link
Author

found a small issue when a trailer contains Final but is for final season eg. Beter Call Sault "Better Call Saul: A Look At The Final Season" It gets picked up as theatrical trailer To stop this, i just did a 2nd check that it doesnt contain season

@Gujal00, @matthuisman

checkout Gujal00/Kodi-Official@c556e4e

I've provided working API lookups for all the pages currently scraped:

VideoPlayback => https://www.imdb.com/video/vi1020905497/?ref_=ttvg_vi_1
CalendarPage => https://www.imdb.com/calendar/?region=US&type=MOVIE&ref_=rlm
movies_near_you => https://www.imdb.com/showtimes/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment