-
-
Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.
import json | |
import requests | |
import time | |
def get_imdb_videos(imdb_id): | |
import re, requests | |
API_URL = "https://graphql.prod.api.imdb.a2z.com/" | |
HEADERS = { | |
'Referer': 'https://www.imdb.com/', | |
'Origin': 'https://www.imdb.com', | |
'User-Agent': 'Mozilla/5.0' | |
} | |
def gqlmin(q): | |
return re.sub(' {4}', '', q) | |
query_subpage = ''' | |
query TitleVideoGallerySubPage( | |
$const: ID!, | |
$first: Int!, | |
$filter: VideosQueryFilter, | |
$sort: VideoSort | |
) { | |
title(id: $const) { | |
titleText { text } | |
plot { plotText { plainText } } | |
videoStrip(first: $first, filter: $filter, sort: $sort) { | |
...VideoGalleryItems | |
} | |
} | |
} | |
''' | |
query_pagination = ''' | |
query TitleVideoGalleryPagination( | |
$const: ID!, | |
$first: Int!, | |
$after: ID!, | |
$filter: VideosQueryFilter, | |
$sort: VideoSort | |
) { | |
title(id: $const) { | |
videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) { | |
...VideoGalleryItems | |
} | |
} | |
} | |
''' | |
fragment = ''' | |
fragment VideoGalleryItems on VideoConnection { | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
total | |
edges { | |
node { | |
id | |
contentType { id } | |
name { value } | |
runtime { value } | |
thumbnail { url } | |
primaryTitle { | |
series { | |
displayableEpisodeNumber { | |
displayableSeason { | |
season | |
} | |
} | |
series { | |
titleText { text } | |
} | |
} | |
} | |
} | |
} | |
} | |
''' | |
variables = { | |
"const": imdb_id, | |
"first": 50, | |
"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]}, | |
"sort": {"by": "DATE", "order": "DESC"} | |
} | |
videos = [] | |
plot_text = "" | |
item_title = "" | |
total_videos = None | |
# First page | |
pdata = { | |
'operationName': "TitleVideoGallerySubPage", | |
'query': gqlmin(query_subpage + fragment), | |
'variables': variables | |
} | |
r = requests.post(API_URL, headers=HEADERS, json=pdata) | |
r.raise_for_status() | |
json_data = r.json() | |
title_data = json_data.get('data', {}).get('title', {}) | |
plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "") | |
item_title = title_data.get('titleText', {}).get('text', "") | |
video_data = title_data.get('videoStrip', {}) | |
total_videos = video_data.get('total') | |
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])]) | |
cursor = video_data.get('pageInfo', {}).get('endCursor') | |
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False) | |
# Pagination loop | |
while has_next and cursor: | |
variables["after"] = cursor | |
pdata = { | |
'operationName': "TitleVideoGalleryPagination", | |
'query': gqlmin(query_pagination + fragment), | |
'variables': variables | |
} | |
r = requests.post(API_URL, headers=HEADERS, json=pdata) | |
r.raise_for_status() | |
video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {}) | |
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])]) | |
cursor = video_data.get('pageInfo', {}).get('endCursor') | |
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False) | |
time.sleep(0.3) | |
# Match old output: inject plot, total, and item_title | |
for idx, v in enumerate(videos): | |
v["plot"] = plot_text | |
v["total"] = total_videos | |
v["item_title"] = item_title | |
videos[idx] = v | |
return videos | |
def time_format(seconds: int) -> str: | |
if seconds is not None: | |
seconds = int(seconds) | |
d = seconds // (3600 * 24) | |
h = seconds // 3600 % 24 | |
m = seconds % 3600 // 60 | |
s = seconds % 3600 % 60 | |
if d > 0: | |
return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s) | |
elif h > 0: | |
return '{:02d}H {:02d}m {:02d}s'.format(h, m, s) | |
elif m > 0: | |
return '{:02d}m {:02d}s'.format(m, s) | |
elif s > 0: | |
return '{:02d}s'.format(s) | |
return '-' | |
import re | |
def extract_season_number(title): | |
# Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits | |
pattern = r"(:?.*(?:Season|Series))(?:\s*\d*)" | |
match = re.search(pattern, title, re.IGNORECASE) | |
try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip()) | |
except: extract_season_number = None | |
return extract_season_number | |
def find_best_trailer(trailer_list, season_number=None): | |
if len(trailer_list) == 0: | |
return None | |
best_match = None | |
best_score = -1 | |
fallback_thumbnail = None | |
trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True) | |
match_list = [] | |
new_trailer_list = [] | |
season_list = [] | |
official_flag = False | |
theatrical_list = ['theatrical','full','final'] | |
theatrical_flag = False | |
titleText = None | |
for trailer in trailer_list: | |
if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer': | |
curr_dict = {} | |
if trailer['primaryTitle'].get('series',{}) != {}: | |
try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season']) | |
except: season = None | |
#print(trailer) | |
curr_dict['id'] = trailer['id'] | |
curr_dict['vid_url'] = 'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id'])) | |
curr_dict['season'] = season | |
curr_dict['title'] = trailer['name']['value'] | |
if season: | |
titleText = trailer['primaryTitle']['series']['series']['titleText']['text'] | |
if not season: | |
season = extract_season_number(curr_dict['title']) | |
if season: | |
curr_dict['season'] = season | |
if any(word in str(curr_dict['title']).lower() for word in theatrical_list): | |
curr_dict['theatrical'] = True | |
theatrical_flag = True | |
else: | |
curr_dict['theatrical'] = False | |
if 'official' in str(curr_dict['title']).lower(): | |
curr_dict['official'] = True | |
official_flag = True | |
if season: | |
official_flag = False | |
curr_dict['official'] = False | |
else: | |
curr_dict['official'] = False | |
if season and not season in season_list: | |
season_list.append(season) | |
curr_dict['thumbnail'] = trailer['thumbnail']['url'] | |
curr_dict['runtime'] = trailer['runtime']['value'] | |
curr_dict['time'] = time_format(trailer['runtime']['value']) | |
#print(curr_dict['title']) | |
new_trailer_list.append(curr_dict) | |
if season_number and season_number in season_list: | |
season_match = True | |
elif season_list != []: | |
if season_number: | |
for i in reversed(sorted(season_list)): | |
if i <= season_number: | |
break | |
season_match = i | |
else: | |
season_match = False | |
else: | |
season_match = False | |
if type(season_match) == type(season_number): | |
if season_match > season_number: | |
season_match = False | |
offical_trailer = None | |
season_trailer = None | |
if season_match == True and type(season_match) == type(True): | |
for trailer in new_trailer_list: | |
if trailer['season'] == season_number: | |
season_trailer = trailer | |
break | |
elif season_match == False: | |
season_trailer = new_trailer_list[0] | |
else: | |
for trailer in new_trailer_list: | |
if trailer['season'] == season_match: | |
season_trailer = trailer | |
break | |
if theatrical_flag == True: | |
for trailer in new_trailer_list: | |
if trailer['theatrical']: | |
offical_trailer = trailer | |
break | |
elif official_flag == True: | |
for trailer in new_trailer_list: | |
if trailer['official'] and not 'teaser' in str(trailer['title']).lower(): | |
offical_trailer = trailer | |
break | |
if not offical_trailer: | |
for trailer in new_trailer_list: | |
if trailer['official']: | |
offical_trailer = trailer | |
break | |
elif titleText: | |
for trailer in new_trailer_list: | |
if trailer['title'] == titleText: | |
offical_trailer = trailer | |
break | |
if offical_trailer and official_flag: | |
if season_match == False or season_trailer == None: | |
season_trailer = offical_trailer | |
elif official_flag == False and offical_trailer: | |
if season_match == False: | |
season_trailer = offical_trailer | |
#print(new_trailer_list) | |
#print(titleText) | |
return season_trailer | |
def extract_imdb_mp4_url(video_id): | |
url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26" | |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
raise Exception(f"Failed to fetch page: {response.status_code}") | |
html = response.text | |
PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]') | |
url = None | |
for i in eval(PlaybackURL): | |
if i['videoMimeType'] == 'MP4': | |
return i['url'], i | |
else: | |
if not url: | |
url = i['url'] | |
video = i | |
#print(i['videoDefinition']) | |
#print(i['videoMimeType']) | |
return url, video | |
all_videos = get_imdb_videos(imdb_id='tt4532368') | |
#print(all_videos) | |
best_trailer = find_best_trailer(all_videos, season_number=None) | |
if best_trailer: | |
print(best_trailer) | |
print(best_trailer['title']) | |
video_url, video = extract_imdb_mp4_url(best_trailer['id']) | |
print("MP4 URL:", video_url) | |
print(video) | |
exit() | |
#print(json.dumps(all_videos[:3], indent=2)) # Show first 3 videos | |
print(f"Total videos fetched: {len(all_videos)}") | |
print(all_videos) | |
for i in all_videos: | |
if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower(): | |
print(i['name']['value'],' - ' ,time_format(i['runtime']['value'])) |
found a small issue when a trailer contains Final but is for final season
eg. Beter Call Sault "Better Call Saul: A Look At The Final Season"
It gets picked up as theatrical trailer
To stop this, i just did a 2nd check that it doesnt contain season
Aye it definitely needs tweaking, if you figure out some iron clad logic to filter down to a series match if it's there or a show trailer if not then let me know.
Although that's probably what to do, movie trailer, show trailer and season trailer if needed.
Thanks to the pointers about TitleVideoGallerySubPage
and TitleVideoGalleryPagination
I have reverse engineered the raw query and implemented in Gujal00/Kodi-Official@72bab2d
You can now use the following in STRM files to test
Severance:
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt11280740
will play latest available trailer
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt11280740&season=3
will play the S3 teaser as there is no trailer yet
Game of Thrones: (Good test as IMDB has 303 videos and you have search through those for season matching)
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt0944947&season=1
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt0944947&season=7
Better Call Saul:
plugin://plugin.video.imdb.trailers/?action=play_id&imdb=tt3032476&season=6
Thanks to the pointers about
TitleVideoGallerySubPage
andTitleVideoGalleryPagination
I have reverse engineered the raw query and implemented in Gujal00/Kodi-Official@72bab2d
How did you do that?
I have a couple of other persisted queries id like to make more permanent AdvancedTitleSearch and ListsPage
But i'm not a java or web guy so without significant help from chatgpt to look at JS for me im pretty lost.
EDIT:
Actually i was able to figure it out myself when i looked at your updated code and saw the "fragment VideoGalleryItems on VideoConnection" part. Knowing that my original queries worked in the different format on the other endpoint helped a lot.
I was then able to look where i found the original persisted queries i was working with and gather enough information to get copilot to produce versions which could be run without being "persistedQuery", after not too much prompting.
Although to get it to actually listen to my query and not do something random i had to write a very long prompt in a text file and upload it; this function works, this function works differently, this is the original verison of the updated function which was a persistedQuery, here are multiple JS files etc.
And i had to include the entire contents of the JS in line too because the references were across multiple JS files and it was getting confused if i uploaded them separately.
So if you are working with copilot yourself at all for further JS inspection i suggest you put all your queries in line in a text file, seems more reliable that way. I'd prefer to use chatgpt and it seems more reliable but i run out of tokens pretty quickly and i only get copilot because we have microsoft 365 with work.
I stay away from AI coding and I am not a programmer by profession either. Just trial and error in Python at hobbyist level to get things going :)
Ideally i would like to find out the nameConstraints or titleConstraints to filter only trailers to be returned rather than all videos, but havent been able to figure that out yet, may be fellow kiwi @matthuisman has some ideas
Yeah I don't do too much Ai coding either but as I don't know JavaScript at all I never would have been able to figure out how to make a compliant query without it.
But I started out trial and error in the terminal too.
Although I now do programming adjacent stuff for work. We don't really use AI as there are GDPR data issues involved but the copilot thing is new and is our own instance I believe so we've been curious about the capabilities and trialing it a bit in our area.
But I mostly do SQL on random data so ai isn't much help as half the thing is figuring out what you are looking at.
But when I am doing random things it is pretty handy to now have access to an interactive stack overflow on steroids.
Does it hallucinate occasionally, yes. But can it parse badly documented/undocumented code and give good info back, yes surprisingly it can. And getting working example code relevant to your problem when you go googling is often half the battle so it's definitely a valuable tool.
FYI the video properties returned contain contentType which has a trailer/clip info like "amzn1.imdb.video.contenttype.trailer" which might be what you need?
Otherwise maybe the query has those as variables inputs?
They might be called nameSearchConstraints?
That difference (IE search) was an error I saw myself (did you mean...)
Yes I already tried contenttype as a filter key and it came with incorrect parameter response, so yeah that is exactly what I am targeting and yet to figure out
Think this should be what you need:
"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
import json
import requests
import time
def get_imdb_videos(imdb_id):
import re, requests
API_URL = "https://graphql.prod.api.imdb.a2z.com/"
HEADERS = {
'Referer': 'https://www.imdb.com/',
'Origin': 'https://www.imdb.com',
'User-Agent': 'Mozilla/5.0'
}
def gqlmin(q):
return re.sub(' {4}', '', q)
query_subpage = '''
query TitleVideoGallerySubPage(
$const: ID!,
$first: Int!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
titleText { text }
plot { plotText { plainText } }
videoStrip(first: $first, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
query_pagination = '''
query TitleVideoGalleryPagination(
$const: ID!,
$first: Int!,
$after: ID!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
fragment = '''
fragment VideoGalleryItems on VideoConnection {
pageInfo {
endCursor
hasNextPage
}
total
edges {
node {
id
contentType { id }
name { value }
runtime { value }
thumbnail { url }
primaryTitle {
series {
displayableEpisodeNumber {
displayableSeason {
season
}
}
series {
titleText { text }
}
}
}
}
}
}
'''
variables = {
"const": imdb_id,
"first": 50,
"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
"sort": {"by": "DATE", "order": "DESC"}
}
videos = []
plot_text = ""
item_title = ""
total_videos = None
# First page
pdata = {
'operationName': "TitleVideoGallerySubPage",
'query': gqlmin(query_subpage + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
json_data = r.json()
title_data = json_data.get('data', {}).get('title', {})
plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
item_title = title_data.get('titleText', {}).get('text', "")
video_data = title_data.get('videoStrip', {})
total_videos = video_data.get('total')
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
# Pagination loop
while has_next and cursor:
variables["after"] = cursor
pdata = {
'operationName': "TitleVideoGalleryPagination",
'query': gqlmin(query_pagination + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
time.sleep(0.3)
# Match old output: inject plot, total, and item_title
for idx, v in enumerate(videos):
v["plot"] = plot_text
v["total"] = total_videos
v["item_title"] = item_title
videos[idx] = v
return videos
all_videos = get_imdb_videos(imdb_id='tt11280740')
print(all_videos)
exit()
however other than "types", nameConstraints are "nameConstraints":{allNameIds":["nm0004395","nm3138882"]}
"
And titleconstraints:
nameConstraints: {
allNameIds: r.nameIds?.sort( (e, t) => e.localeCompare(t))
},
titleConstraints: {
anyTitleIds: r.titleIds?.sort( (e, t) => e.localeCompare(t))
ie anyTitleIds being - tt11280740 eg IMDB ids. So not actually "video clip name" or "video clip title"
found a small issue when a trailer contains Final but is for final season eg. Beter Call Sault "Better Call Saul: A Look At The Final Season" It gets picked up as theatrical trailer To stop this, i just did a 2nd check that it doesnt contain season
checkout Gujal00/Kodi-Official@c556e4e
I've provided working API lookups for all the pages currently scraped:
VideoPlayback => https://www.imdb.com/video/vi1020905497/?ref_=ttvg_vi_1
CalendarPage => https://www.imdb.com/calendar/?region=US&type=MOVIE&ref_=rlm
movies_near_you => https://www.imdb.com/showtimes/
Uh oh!
There was an error while loading. Please reload this page.