Last active
December 4, 2024 23:36
-
-
Save reteps/b829b5da1ef7d22ce18178233f48fd3a to your computer and use it in GitHub Desktop.
Fetch full-res MP4 and Captions from Kaltura owned by you, using SSO Microsoft signin.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import re | |
import html | |
import os | |
import sys | |
from typing import List, Optional | |
KALTURA_BASE = 'https://mediaspace.illinois.edu/' | |
CLIENT_VERSION = 'v2.101' | |
UICONF_ID = '41571891' | |
API_VERSION = '3.1' | |
def fetch_media(username, password): | |
headers = { | |
'Referer': KALTURA_BASE, | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36', | |
} | |
s = requests.Session() | |
print(f'Signing into Kaltura as {username}...') | |
response = s.get(f'{KALTURA_BASE}/user/login', headers=headers) | |
config = json.loads(re.search(r'Config=({.*})', response.text).group(1)) | |
response = s.get('https://login.microsoftonline.com' + config['urlPost']) | |
config = json.loads(re.search(r'Config=({.*})', response.text).group(1)) | |
response = s.post('https://login.microsoftonline.com' + config['urlPost'], data={ | |
"login": username, | |
"passwd": password, | |
"ctx": config['sCtx'], | |
"flowToken": config['sFT'], | |
}) | |
url, body, relay = re.search(r'action="(.*?)".*?value="(.*?)".*?RelayState.*?value="(.*?)"', response.text, re.DOTALL).groups() | |
response = s.post(url, data={'SAMLResponse': body, "RelayState": relay}) | |
url, relay, body = re.search(r'action="(.*?)"(?:.|\n)*?value="(.*?)"(?:.|\n)*?value="(.*?)"', response.text, re.DOTALL).groups() | |
s.post(html.unescape(url), data={'SAMLResponse': body, "RelayState": html.unescape(relay)}) | |
# for debugging, uncomment this and use a valid Kaltura cookie. | |
# s = requests.Session() | |
# s.cookies.set('kms_ctamuls', "75br1kn1n7ctjs7siqj2moolbm") | |
body = {"controller": "user", "action": "user-media", "page": "1"} | |
res = s.post(f'{KALTURA_BASE}/my-media', json=body) | |
try: | |
info, = re.search(r'MyMediaPage,\s+({.*?})\)', res.text).groups() | |
except AttributeError: | |
print('Failed to get info -- sign in most likely failed') | |
os.exit(1) | |
info = json.loads(info) | |
print('Getting a valid session') | |
# pull out a KS value from a flashvars session | |
first = s.get(f'{KALTURA_BASE}/edit/' + info['data'][0]['entry']['id']) | |
ks, = re.search(r'var flashvars = {"ks":"(.*?)"', first.text).groups() | |
entries = [] | |
for media in info['data']: | |
print('Fetching video for', media['entry']['id']) | |
entry = media['entry'] | |
resp =requests.get(f'https://cdnapisec.kaltura.com/html5/html5lib/{CLIENT_VERSION}/modules/KalturaSupport/download.php/wid/_{entry["partnerId"]}/uiconf_id/{UICONF_ID}/entry_id/{entry["id"]}', params={ | |
'forceDownload': 'true', | |
'downloadName': entry['name'], | |
'preferredBitrate': 0, | |
'ks': ks, | |
}, allow_redirects=False) | |
entry['fullResDownloadUrl'] = resp.headers['Location'] | |
resp = requests.get( | |
'https://cdnapisec.kaltura.com/api_v3/index.php', | |
params={ | |
'service':'caption_captionasset', | |
'apiVersion': API_VERSION, | |
'expiry':'86400', | |
'clientTag':f'kwidget:{CLIENT_VERSION}', | |
'format':'1', | |
'ignoreNull':'1', | |
'action':'list', | |
'filter:objectType':'KalturaAssetFilter', | |
'filter:entryIdEqual':entry["id"], | |
'filter:statusEqual':2, | |
'pager:pageSize':50, | |
'ks':ks, | |
}) | |
caption_id = None | |
for o in resp.json()['objects']: | |
if o['label'] == 'Zoom_TRANSCRIPT': | |
caption_id = o['id'] | |
break | |
if caption_id: | |
print('Fetching captions for', media['entry']['id']) | |
resp = requests.get( | |
'https://cdnapisec.kaltura.com/api_v3/index.php', | |
params={ | |
'service': 'multirequest', | |
'apiVersion': API_VERSION, | |
'expiry':'86400', | |
'clientTag':f'kwidget:{CLIENT_VERSION}', | |
'format':'1', | |
'ignoreNull':'1', | |
'action':'null', | |
'1:ks': ks, | |
'1:service': 'caption_captionasset', | |
'1:action': 'getUrl', | |
'1:id': caption_id | |
} | |
) | |
url = resp.json()[0] | |
entry['zoomTranscript'] = url | |
else: | |
entry['zoomTranscript'] = None | |
del entry['relatedObjects'] | |
entries.append(entry) | |
return entries |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment