Created
March 19, 2022 05:13
-
-
Save JeanOlivier/346cfcfdbc92c94a462fcb4017b481ac to your computer and use it in GitHub Desktop.
March Meeting 2022 Live talks downloader. Procedure + code to download the live talks once they're available on demand. Might be useful for many similar livestreams with non-predictable chunk filenames.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env python3 | |
# -*- coding: utf-8 -*- | |
import os | |
import json | |
import subprocess | |
from base64 import b64decode | |
from haralyzer import HarParser, HarPage | |
from dateutil import parser as dateparser | |
""" | |
MM2022 video downloader | |
0. In Firefox: set "devtools.netmonitor.responseBodyLimit" to 0 in "about:config" | |
1. Load talk page in firefox, don't start the stream | |
2. Open the dev tools in firefox with ctrl+shift+c, browse to the network tab, set "Persist Logs" to True in the network tab settings. | |
3. Start the stream and quickly set the resolution to the desired one (e.g. 720p) | |
4. Play the whole talk (you can do that on 2x and on mute) | |
5. Once it's done: | |
1. Search the network tab for "m4a" | |
2. Right-click the first file ---> Save All As HAR | |
3. Save as "*session*_audio.har" | |
6. Then: | |
1. Search the network tab for "m4v" | |
2. Right-click the first file ---> Save All As HAR | |
3. Save as "*session*_video.har" | |
7. Execute `extract_session(*session*)` | |
""" | |
def parse_har_file(har_file, output_file): | |
# Parsing HAR file dump | |
with open(har_file, 'r') as f: | |
har_parser = HarParser(json.loads(f.read())) | |
# The actual data | |
data = har_parser.har_data | |
# Sorting entries by the time at which they were downloaded | |
sorted_entries = sorted(data['entries'], key=lambda e: dateparser.parse(e['startedDateTime'])) | |
# Writing each chunk to file sequentially | |
with open(output_file, 'wb') as f: | |
for e in sorted_entries: | |
tmp = b64decode(e['response']['content']['text']) | |
chunk_header_skip = tmp.find(b'ftyp') - 4 # They added their custom header, let's strip that. | |
chunk_data = tmp[chunk_header_skip:] | |
f.write(chunk_data) | |
def merge_audio_video(session_name, cleanup=True): | |
# Building the command to merge audio and video. | |
# We copy; we don't reencode. This is both faster and lossless compared to a reencode. | |
# Note that the resolution might change within the video. VLC handles that well, other players are untested. | |
cmd = f"ffmpeg -i {session_name}_video.m4v -i {session_name}_audio.m4a -c:v copy -c:a copy {session_name}_Final.mkv" | |
# Merging using FFMPEG | |
try: | |
subprocess.check_output(cmd.split(" ")) | |
# In case of error, let's tell the user to do it themselves | |
except subprocess.CalledProcessError as e: | |
print(f"\nAutomatic merging of audio and video failed.\nYou should now merge the audio and video manually, e.g. using FFMPEG:\n {cmd}") | |
if cleanup: # Removing intermediate files we can easily recreate | |
for t in ["audio", "video"]: | |
os.remove(f"{session_name}_{t}.m4{t[0]}") | |
def extract_session(session_name, merge_av=True, *args, **kwargs): | |
# The naming convention is `{session_name}_{type}.har` with `type` either `audio` or `video`. | |
har_audio = f'{session_name}_audio.har' | |
out_audio = f'{session_name}_audio.m4a' | |
har_video = f'{session_name}_video.har' | |
out_video = f'{session_name}_video.m4v' | |
parse_har_file(har_audio, out_audio) | |
parse_har_file(har_video, out_video) | |
if merge_av: | |
merge_audio_video(session_name, *args, **kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment