Skip to content

Instantly share code, notes, and snippets.

@reteps
Last active December 4, 2024 23:36
Show Gist options
  • Save reteps/b829b5da1ef7d22ce18178233f48fd3a to your computer and use it in GitHub Desktop.
Save reteps/b829b5da1ef7d22ce18178233f48fd3a to your computer and use it in GitHub Desktop.
Fetch full-res MP4 and Captions from Kaltura owned by you, using SSO Microsoft signin.
import requests
import json
import re
import html
import os
import sys
from typing import List, Optional
KALTURA_BASE = 'https://mediaspace.illinois.edu/'
CLIENT_VERSION = 'v2.101'
UICONF_ID = '41571891'
API_VERSION = '3.1'
def fetch_media(username, password):
headers = {
'Referer': KALTURA_BASE,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
s = requests.Session()
print(f'Signing into Kaltura as {username}...')
response = s.get(f'{KALTURA_BASE}/user/login', headers=headers)
config = json.loads(re.search(r'Config=({.*})', response.text).group(1))
response = s.get('https://login.microsoftonline.com' + config['urlPost'])
config = json.loads(re.search(r'Config=({.*})', response.text).group(1))
response = s.post('https://login.microsoftonline.com' + config['urlPost'], data={
"login": username,
"passwd": password,
"ctx": config['sCtx'],
"flowToken": config['sFT'],
})
url, body, relay = re.search(r'action="(.*?)".*?value="(.*?)".*?RelayState.*?value="(.*?)"', response.text, re.DOTALL).groups()
response = s.post(url, data={'SAMLResponse': body, "RelayState": relay})
url, relay, body = re.search(r'action="(.*?)"(?:.|\n)*?value="(.*?)"(?:.|\n)*?value="(.*?)"', response.text, re.DOTALL).groups()
s.post(html.unescape(url), data={'SAMLResponse': body, "RelayState": html.unescape(relay)})
# for debugging, uncomment this and use a valid Kaltura cookie.
# s = requests.Session()
# s.cookies.set('kms_ctamuls', "75br1kn1n7ctjs7siqj2moolbm")
body = {"controller": "user", "action": "user-media", "page": "1"}
res = s.post(f'{KALTURA_BASE}/my-media', json=body)
try:
info, = re.search(r'MyMediaPage,\s+({.*?})\)', res.text).groups()
except AttributeError:
print('Failed to get info -- sign in most likely failed')
os.exit(1)
info = json.loads(info)
print('Getting a valid session')
# pull out a KS value from a flashvars session
first = s.get(f'{KALTURA_BASE}/edit/' + info['data'][0]['entry']['id'])
ks, = re.search(r'var flashvars = {"ks":"(.*?)"', first.text).groups()
entries = []
for media in info['data']:
print('Fetching video for', media['entry']['id'])
entry = media['entry']
resp =requests.get(f'https://cdnapisec.kaltura.com/html5/html5lib/{CLIENT_VERSION}/modules/KalturaSupport/download.php/wid/_{entry["partnerId"]}/uiconf_id/{UICONF_ID}/entry_id/{entry["id"]}', params={
'forceDownload': 'true',
'downloadName': entry['name'],
'preferredBitrate': 0,
'ks': ks,
}, allow_redirects=False)
entry['fullResDownloadUrl'] = resp.headers['Location']
resp = requests.get(
'https://cdnapisec.kaltura.com/api_v3/index.php',
params={
'service':'caption_captionasset',
'apiVersion': API_VERSION,
'expiry':'86400',
'clientTag':f'kwidget:{CLIENT_VERSION}',
'format':'1',
'ignoreNull':'1',
'action':'list',
'filter:objectType':'KalturaAssetFilter',
'filter:entryIdEqual':entry["id"],
'filter:statusEqual':2,
'pager:pageSize':50,
'ks':ks,
})
caption_id = None
for o in resp.json()['objects']:
if o['label'] == 'Zoom_TRANSCRIPT':
caption_id = o['id']
break
if caption_id:
print('Fetching captions for', media['entry']['id'])
resp = requests.get(
'https://cdnapisec.kaltura.com/api_v3/index.php',
params={
'service': 'multirequest',
'apiVersion': API_VERSION,
'expiry':'86400',
'clientTag':f'kwidget:{CLIENT_VERSION}',
'format':'1',
'ignoreNull':'1',
'action':'null',
'1:ks': ks,
'1:service': 'caption_captionasset',
'1:action': 'getUrl',
'1:id': caption_id
}
)
url = resp.json()[0]
entry['zoomTranscript'] = url
else:
entry['zoomTranscript'] = None
del entry['relatedObjects']
entries.append(entry)
return entries
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment